aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid1.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r--drivers/md/raid1.c134
1 files changed, 92 insertions, 42 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index fb6b866c28f5..1cbf51fbd43f 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -301,7 +301,7 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int
301{ 301{
302 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 302 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
303 r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); 303 r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
304 int mirror, behind; 304 int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state);
305 conf_t *conf = mddev_to_conf(r1_bio->mddev); 305 conf_t *conf = mddev_to_conf(r1_bio->mddev);
306 306
307 if (bio->bi_size) 307 if (bio->bi_size)
@@ -311,47 +311,54 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int
311 if (r1_bio->bios[mirror] == bio) 311 if (r1_bio->bios[mirror] == bio)
312 break; 312 break;
313 313
314 /* 314 if (error == -ENOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) {
315 * this branch is our 'one mirror IO has finished' event handler: 315 set_bit(BarriersNotsupp, &conf->mirrors[mirror].rdev->flags);
316 */ 316 set_bit(R1BIO_BarrierRetry, &r1_bio->state);
317 if (!uptodate) { 317 r1_bio->mddev->barriers_work = 0;
318 md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); 318 } else {
319 /* an I/O failed, we can't clear the bitmap */
320 set_bit(R1BIO_Degraded, &r1_bio->state);
321 } else
322 /* 319 /*
323 * Set R1BIO_Uptodate in our master bio, so that 320 * this branch is our 'one mirror IO has finished' event handler:
324 * we will return a good error code for to the higher
325 * levels even if IO on some other mirrored buffer fails.
326 *
327 * The 'master' represents the composite IO operation to
328 * user-side. So if something waits for IO, then it will
329 * wait for the 'master' bio.
330 */ 321 */
331 set_bit(R1BIO_Uptodate, &r1_bio->state); 322 r1_bio->bios[mirror] = NULL;
332 323 bio_put(bio);
333 update_head_pos(mirror, r1_bio); 324 if (!uptodate) {
334 325 md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
335 behind = test_bit(R1BIO_BehindIO, &r1_bio->state); 326 /* an I/O failed, we can't clear the bitmap */
336 if (behind) { 327 set_bit(R1BIO_Degraded, &r1_bio->state);
337 if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags)) 328 } else
338 atomic_dec(&r1_bio->behind_remaining); 329 /*
339 330 * Set R1BIO_Uptodate in our master bio, so that
340 /* In behind mode, we ACK the master bio once the I/O has safely 331 * we will return a good error code for to the higher
341 * reached all non-writemostly disks. Setting the Returned bit 332 * levels even if IO on some other mirrored buffer fails.
342 * ensures that this gets done only once -- we don't ever want to 333 *
343 * return -EIO here, instead we'll wait */ 334 * The 'master' represents the composite IO operation to
344 335 * user-side. So if something waits for IO, then it will
345 if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) && 336 * wait for the 'master' bio.
346 test_bit(R1BIO_Uptodate, &r1_bio->state)) { 337 */
347 /* Maybe we can return now */ 338 set_bit(R1BIO_Uptodate, &r1_bio->state);
348 if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { 339
349 struct bio *mbio = r1_bio->master_bio; 340 update_head_pos(mirror, r1_bio);
350 PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n", 341
351 (unsigned long long) mbio->bi_sector, 342 if (behind) {
352 (unsigned long long) mbio->bi_sector + 343 if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags))
353 (mbio->bi_size >> 9) - 1); 344 atomic_dec(&r1_bio->behind_remaining);
354 bio_endio(mbio, mbio->bi_size, 0); 345
346 /* In behind mode, we ACK the master bio once the I/O has safely
347 * reached all non-writemostly disks. Setting the Returned bit
348 * ensures that this gets done only once -- we don't ever want to
349 * return -EIO here, instead we'll wait */
350
351 if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) &&
352 test_bit(R1BIO_Uptodate, &r1_bio->state)) {
353 /* Maybe we can return now */
354 if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
355 struct bio *mbio = r1_bio->master_bio;
356 PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n",
357 (unsigned long long) mbio->bi_sector,
358 (unsigned long long) mbio->bi_sector +
359 (mbio->bi_size >> 9) - 1);
360 bio_endio(mbio, mbio->bi_size, 0);
361 }
355 } 362 }
356 } 363 }
357 } 364 }
@@ -361,8 +368,16 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int
361 * already. 368 * already.
362 */ 369 */
363 if (atomic_dec_and_test(&r1_bio->remaining)) { 370 if (atomic_dec_and_test(&r1_bio->remaining)) {
371 if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) {
372 reschedule_retry(r1_bio);
373 /* Don't dec_pending yet, we want to hold
374 * the reference over the retry
375 */
376 return 0;
377 }
364 if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { 378 if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
365 /* free extra copy of the data pages */ 379 /* free extra copy of the data pages */
380/* FIXME bio has been freed!!! */
366 int i = bio->bi_vcnt; 381 int i = bio->bi_vcnt;
367 while (i--) 382 while (i--)
368 __free_page(bio->bi_io_vec[i].bv_page); 383 __free_page(bio->bi_io_vec[i].bv_page);
@@ -648,8 +663,9 @@ static int make_request(request_queue_t *q, struct bio * bio)
648 struct bio_list bl; 663 struct bio_list bl;
649 struct page **behind_pages = NULL; 664 struct page **behind_pages = NULL;
650 const int rw = bio_data_dir(bio); 665 const int rw = bio_data_dir(bio);
666 int do_barriers;
651 667
652 if (unlikely(bio_barrier(bio))) { 668 if (unlikely(!mddev->barriers_work && bio_barrier(bio))) {
653 bio_endio(bio, bio->bi_size, -EOPNOTSUPP); 669 bio_endio(bio, bio->bi_size, -EOPNOTSUPP);
654 return 0; 670 return 0;
655 } 671 }
@@ -759,6 +775,10 @@ static int make_request(request_queue_t *q, struct bio * bio)
759 atomic_set(&r1_bio->remaining, 0); 775 atomic_set(&r1_bio->remaining, 0);
760 atomic_set(&r1_bio->behind_remaining, 0); 776 atomic_set(&r1_bio->behind_remaining, 0);
761 777
778 do_barriers = bio->bi_rw & BIO_RW_BARRIER;
779 if (do_barriers)
780 set_bit(R1BIO_Barrier, &r1_bio->state);
781
762 bio_list_init(&bl); 782 bio_list_init(&bl);
763 for (i = 0; i < disks; i++) { 783 for (i = 0; i < disks; i++) {
764 struct bio *mbio; 784 struct bio *mbio;
@@ -771,7 +791,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
771 mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; 791 mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset;
772 mbio->bi_bdev = conf->mirrors[i].rdev->bdev; 792 mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
773 mbio->bi_end_io = raid1_end_write_request; 793 mbio->bi_end_io = raid1_end_write_request;
774 mbio->bi_rw = WRITE; 794 mbio->bi_rw = WRITE | do_barriers;
775 mbio->bi_private = r1_bio; 795 mbio->bi_private = r1_bio;
776 796
777 if (behind_pages) { 797 if (behind_pages) {
@@ -1153,6 +1173,36 @@ static void raid1d(mddev_t *mddev)
1153 if (test_bit(R1BIO_IsSync, &r1_bio->state)) { 1173 if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
1154 sync_request_write(mddev, r1_bio); 1174 sync_request_write(mddev, r1_bio);
1155 unplug = 1; 1175 unplug = 1;
1176 } else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) {
1177 /* some requests in the r1bio were BIO_RW_BARRIER
1178 * requests which failed with -ENOTSUPP. Hohumm..
1179 * Better resubmit without the barrier.
1180 * We know which devices to resubmit for, because
1181 * all others have had their bios[] entry cleared.
1182 */
1183 int i;
1184 clear_bit(R1BIO_BarrierRetry, &r1_bio->state);
1185 clear_bit(R1BIO_Barrier, &r1_bio->state);
1186 for (i=0; i < conf->raid_disks; i++)
1187 if (r1_bio->bios[i]) {
1188 struct bio_vec *bvec;
1189 int j;
1190
1191 bio = bio_clone(r1_bio->master_bio, GFP_NOIO);
1192 /* copy pages from the failed bio, as
1193 * this might be a write-behind device */
1194 __bio_for_each_segment(bvec, bio, j, 0)
1195 bvec->bv_page = bio_iovec_idx(r1_bio->bios[i], j)->bv_page;
1196 bio_put(r1_bio->bios[i]);
1197 bio->bi_sector = r1_bio->sector +
1198 conf->mirrors[i].rdev->data_offset;
1199 bio->bi_bdev = conf->mirrors[i].rdev->bdev;
1200 bio->bi_end_io = raid1_end_write_request;
1201 bio->bi_rw = WRITE;
1202 bio->bi_private = r1_bio;
1203 r1_bio->bios[i] = bio;
1204 generic_make_request(bio);
1205 }
1156 } else { 1206 } else {
1157 int disk; 1207 int disk;
1158 bio = r1_bio->bios[r1_bio->read_disk]; 1208 bio = r1_bio->bios[r1_bio->read_disk];