diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/bitmap.c | 5 | ||||
-rw-r--r-- | drivers/md/md.c | 99 | ||||
-rw-r--r-- | drivers/md/raid1.c | 134 |
3 files changed, 177 insertions, 61 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 220273e81ed6..51315302a85e 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -301,7 +301,7 @@ static int write_sb_page(mddev_t *mddev, long offset, struct page *page, int wai | |||
301 | page); | 301 | page); |
302 | 302 | ||
303 | if (wait) | 303 | if (wait) |
304 | wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0); | 304 | md_super_wait(mddev); |
305 | return 0; | 305 | return 0; |
306 | } | 306 | } |
307 | 307 | ||
@@ -828,8 +828,7 @@ int bitmap_unplug(struct bitmap *bitmap) | |||
828 | wake_up_process(bitmap->writeback_daemon->tsk)); | 828 | wake_up_process(bitmap->writeback_daemon->tsk)); |
829 | spin_unlock_irq(&bitmap->write_lock); | 829 | spin_unlock_irq(&bitmap->write_lock); |
830 | } else | 830 | } else |
831 | wait_event(bitmap->mddev->sb_wait, | 831 | md_super_wait(bitmap->mddev); |
832 | atomic_read(&bitmap->mddev->pending_writes)==0); | ||
833 | } | 832 | } |
834 | return 0; | 833 | return 0; |
835 | } | 834 | } |
diff --git a/drivers/md/md.c b/drivers/md/md.c index caa4add00c1b..199016932de5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -330,18 +330,46 @@ static void free_disk_sb(mdk_rdev_t * rdev) | |||
330 | static int super_written(struct bio *bio, unsigned int bytes_done, int error) | 330 | static int super_written(struct bio *bio, unsigned int bytes_done, int error) |
331 | { | 331 | { |
332 | mdk_rdev_t *rdev = bio->bi_private; | 332 | mdk_rdev_t *rdev = bio->bi_private; |
333 | mddev_t *mddev = rdev->mddev; | ||
333 | if (bio->bi_size) | 334 | if (bio->bi_size) |
334 | return 1; | 335 | return 1; |
335 | 336 | ||
336 | if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) | 337 | if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) |
337 | md_error(rdev->mddev, rdev); | 338 | md_error(mddev, rdev); |
338 | 339 | ||
339 | if (atomic_dec_and_test(&rdev->mddev->pending_writes)) | 340 | if (atomic_dec_and_test(&mddev->pending_writes)) |
340 | wake_up(&rdev->mddev->sb_wait); | 341 | wake_up(&mddev->sb_wait); |
341 | bio_put(bio); | 342 | bio_put(bio); |
342 | return 0; | 343 | return 0; |
343 | } | 344 | } |
344 | 345 | ||
346 | static int super_written_barrier(struct bio *bio, unsigned int bytes_done, int error) | ||
347 | { | ||
348 | struct bio *bio2 = bio->bi_private; | ||
349 | mdk_rdev_t *rdev = bio2->bi_private; | ||
350 | mddev_t *mddev = rdev->mddev; | ||
351 | if (bio->bi_size) | ||
352 | return 1; | ||
353 | |||
354 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags) && | ||
355 | error == -EOPNOTSUPP) { | ||
356 | unsigned long flags; | ||
357 | /* barriers don't appear to be supported :-( */ | ||
358 | set_bit(BarriersNotsupp, &rdev->flags); | ||
359 | mddev->barriers_work = 0; | ||
360 | spin_lock_irqsave(&mddev->write_lock, flags); | ||
361 | bio2->bi_next = mddev->biolist; | ||
362 | mddev->biolist = bio2; | ||
363 | spin_unlock_irqrestore(&mddev->write_lock, flags); | ||
364 | wake_up(&mddev->sb_wait); | ||
365 | bio_put(bio); | ||
366 | return 0; | ||
367 | } | ||
368 | bio_put(bio2); | ||
369 | bio->bi_private = rdev; | ||
370 | return super_written(bio, bytes_done, error); | ||
371 | } | ||
372 | |||
345 | void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, | 373 | void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, |
346 | sector_t sector, int size, struct page *page) | 374 | sector_t sector, int size, struct page *page) |
347 | { | 375 | { |
@@ -350,16 +378,54 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, | |||
350 | * and decrement it on completion, waking up sb_wait | 378 | * and decrement it on completion, waking up sb_wait |
351 | * if zero is reached. | 379 | * if zero is reached. |
352 | * If an error occurred, call md_error | 380 | * If an error occurred, call md_error |
381 | * | ||
382 | * As we might need to resubmit the request if BIO_RW_BARRIER | ||
383 | * causes ENOTSUPP, we allocate a spare bio... | ||
353 | */ | 384 | */ |
354 | struct bio *bio = bio_alloc(GFP_NOIO, 1); | 385 | struct bio *bio = bio_alloc(GFP_NOIO, 1); |
386 | int rw = (1<<BIO_RW) | (1<<BIO_RW_SYNC); | ||
355 | 387 | ||
356 | bio->bi_bdev = rdev->bdev; | 388 | bio->bi_bdev = rdev->bdev; |
357 | bio->bi_sector = sector; | 389 | bio->bi_sector = sector; |
358 | bio_add_page(bio, page, size, 0); | 390 | bio_add_page(bio, page, size, 0); |
359 | bio->bi_private = rdev; | 391 | bio->bi_private = rdev; |
360 | bio->bi_end_io = super_written; | 392 | bio->bi_end_io = super_written; |
393 | bio->bi_rw = rw; | ||
394 | |||
361 | atomic_inc(&mddev->pending_writes); | 395 | atomic_inc(&mddev->pending_writes); |
362 | submit_bio((1<<BIO_RW)|(1<<BIO_RW_SYNC), bio); | 396 | if (!test_bit(BarriersNotsupp, &rdev->flags)) { |
397 | struct bio *rbio; | ||
398 | rw |= (1<<BIO_RW_BARRIER); | ||
399 | rbio = bio_clone(bio, GFP_NOIO); | ||
400 | rbio->bi_private = bio; | ||
401 | rbio->bi_end_io = super_written_barrier; | ||
402 | submit_bio(rw, rbio); | ||
403 | } else | ||
404 | submit_bio(rw, bio); | ||
405 | } | ||
406 | |||
407 | void md_super_wait(mddev_t *mddev) | ||
408 | { | ||
409 | /* wait for all superblock writes that were scheduled to complete. | ||
410 | * if any had to be retried (due to BARRIER problems), retry them | ||
411 | */ | ||
412 | DEFINE_WAIT(wq); | ||
413 | for(;;) { | ||
414 | prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE); | ||
415 | if (atomic_read(&mddev->pending_writes)==0) | ||
416 | break; | ||
417 | while (mddev->biolist) { | ||
418 | struct bio *bio; | ||
419 | spin_lock_irq(&mddev->write_lock); | ||
420 | bio = mddev->biolist; | ||
421 | mddev->biolist = bio->bi_next ; | ||
422 | bio->bi_next = NULL; | ||
423 | spin_unlock_irq(&mddev->write_lock); | ||
424 | submit_bio(bio->bi_rw, bio); | ||
425 | } | ||
426 | schedule(); | ||
427 | } | ||
428 | finish_wait(&mddev->sb_wait, &wq); | ||
363 | } | 429 | } |
364 | 430 | ||
365 | static int bi_complete(struct bio *bio, unsigned int bytes_done, int error) | 431 | static int bi_complete(struct bio *bio, unsigned int bytes_done, int error) |
@@ -1382,7 +1448,7 @@ static void md_update_sb(mddev_t * mddev) | |||
1382 | int sync_req; | 1448 | int sync_req; |
1383 | 1449 | ||
1384 | repeat: | 1450 | repeat: |
1385 | spin_lock(&mddev->write_lock); | 1451 | spin_lock_irq(&mddev->write_lock); |
1386 | sync_req = mddev->in_sync; | 1452 | sync_req = mddev->in_sync; |
1387 | mddev->utime = get_seconds(); | 1453 | mddev->utime = get_seconds(); |
1388 | mddev->events ++; | 1454 | mddev->events ++; |
@@ -1405,11 +1471,11 @@ repeat: | |||
1405 | */ | 1471 | */ |
1406 | if (!mddev->persistent) { | 1472 | if (!mddev->persistent) { |
1407 | mddev->sb_dirty = 0; | 1473 | mddev->sb_dirty = 0; |
1408 | spin_unlock(&mddev->write_lock); | 1474 | spin_unlock_irq(&mddev->write_lock); |
1409 | wake_up(&mddev->sb_wait); | 1475 | wake_up(&mddev->sb_wait); |
1410 | return; | 1476 | return; |
1411 | } | 1477 | } |
1412 | spin_unlock(&mddev->write_lock); | 1478 | spin_unlock_irq(&mddev->write_lock); |
1413 | 1479 | ||
1414 | dprintk(KERN_INFO | 1480 | dprintk(KERN_INFO |
1415 | "md: updating %s RAID superblock on device (in sync %d)\n", | 1481 | "md: updating %s RAID superblock on device (in sync %d)\n", |
@@ -1437,17 +1503,17 @@ repeat: | |||
1437 | /* only need to write one superblock... */ | 1503 | /* only need to write one superblock... */ |
1438 | break; | 1504 | break; |
1439 | } | 1505 | } |
1440 | wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0); | 1506 | md_super_wait(mddev); |
1441 | /* if there was a failure, sb_dirty was set to 1, and we re-write super */ | 1507 | /* if there was a failure, sb_dirty was set to 1, and we re-write super */ |
1442 | 1508 | ||
1443 | spin_lock(&mddev->write_lock); | 1509 | spin_lock_irq(&mddev->write_lock); |
1444 | if (mddev->in_sync != sync_req|| mddev->sb_dirty == 1) { | 1510 | if (mddev->in_sync != sync_req|| mddev->sb_dirty == 1) { |
1445 | /* have to write it out again */ | 1511 | /* have to write it out again */ |
1446 | spin_unlock(&mddev->write_lock); | 1512 | spin_unlock_irq(&mddev->write_lock); |
1447 | goto repeat; | 1513 | goto repeat; |
1448 | } | 1514 | } |
1449 | mddev->sb_dirty = 0; | 1515 | mddev->sb_dirty = 0; |
1450 | spin_unlock(&mddev->write_lock); | 1516 | spin_unlock_irq(&mddev->write_lock); |
1451 | wake_up(&mddev->sb_wait); | 1517 | wake_up(&mddev->sb_wait); |
1452 | 1518 | ||
1453 | } | 1519 | } |
@@ -1989,6 +2055,7 @@ static int do_md_run(mddev_t * mddev) | |||
1989 | 2055 | ||
1990 | mddev->recovery = 0; | 2056 | mddev->recovery = 0; |
1991 | mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ | 2057 | mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ |
2058 | mddev->barriers_work = 1; | ||
1992 | 2059 | ||
1993 | /* before we start the array running, initialise the bitmap */ | 2060 | /* before we start the array running, initialise the bitmap */ |
1994 | err = bitmap_create(mddev); | 2061 | err = bitmap_create(mddev); |
@@ -2107,7 +2174,7 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
2107 | mddev->ro = 1; | 2174 | mddev->ro = 1; |
2108 | } else { | 2175 | } else { |
2109 | bitmap_flush(mddev); | 2176 | bitmap_flush(mddev); |
2110 | wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0); | 2177 | md_super_wait(mddev); |
2111 | if (mddev->ro) | 2178 | if (mddev->ro) |
2112 | set_disk_ro(disk, 0); | 2179 | set_disk_ro(disk, 0); |
2113 | blk_queue_make_request(mddev->queue, md_fail_request); | 2180 | blk_queue_make_request(mddev->queue, md_fail_request); |
@@ -3796,13 +3863,13 @@ void md_write_start(mddev_t *mddev, struct bio *bi) | |||
3796 | 3863 | ||
3797 | atomic_inc(&mddev->writes_pending); | 3864 | atomic_inc(&mddev->writes_pending); |
3798 | if (mddev->in_sync) { | 3865 | if (mddev->in_sync) { |
3799 | spin_lock(&mddev->write_lock); | 3866 | spin_lock_irq(&mddev->write_lock); |
3800 | if (mddev->in_sync) { | 3867 | if (mddev->in_sync) { |
3801 | mddev->in_sync = 0; | 3868 | mddev->in_sync = 0; |
3802 | mddev->sb_dirty = 1; | 3869 | mddev->sb_dirty = 1; |
3803 | md_wakeup_thread(mddev->thread); | 3870 | md_wakeup_thread(mddev->thread); |
3804 | } | 3871 | } |
3805 | spin_unlock(&mddev->write_lock); | 3872 | spin_unlock_irq(&mddev->write_lock); |
3806 | } | 3873 | } |
3807 | wait_event(mddev->sb_wait, mddev->sb_dirty==0); | 3874 | wait_event(mddev->sb_wait, mddev->sb_dirty==0); |
3808 | } | 3875 | } |
@@ -4112,7 +4179,7 @@ void md_check_recovery(mddev_t *mddev) | |||
4112 | if (mddev_trylock(mddev)==0) { | 4179 | if (mddev_trylock(mddev)==0) { |
4113 | int spares =0; | 4180 | int spares =0; |
4114 | 4181 | ||
4115 | spin_lock(&mddev->write_lock); | 4182 | spin_lock_irq(&mddev->write_lock); |
4116 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && | 4183 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && |
4117 | !mddev->in_sync && mddev->recovery_cp == MaxSector) { | 4184 | !mddev->in_sync && mddev->recovery_cp == MaxSector) { |
4118 | mddev->in_sync = 1; | 4185 | mddev->in_sync = 1; |
@@ -4120,7 +4187,7 @@ void md_check_recovery(mddev_t *mddev) | |||
4120 | } | 4187 | } |
4121 | if (mddev->safemode == 1) | 4188 | if (mddev->safemode == 1) |
4122 | mddev->safemode = 0; | 4189 | mddev->safemode = 0; |
4123 | spin_unlock(&mddev->write_lock); | 4190 | spin_unlock_irq(&mddev->write_lock); |
4124 | 4191 | ||
4125 | if (mddev->sb_dirty) | 4192 | if (mddev->sb_dirty) |
4126 | md_update_sb(mddev); | 4193 | md_update_sb(mddev); |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index fb6b866c28f5..1cbf51fbd43f 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -301,7 +301,7 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int | |||
301 | { | 301 | { |
302 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 302 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
303 | r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); | 303 | r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); |
304 | int mirror, behind; | 304 | int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state); |
305 | conf_t *conf = mddev_to_conf(r1_bio->mddev); | 305 | conf_t *conf = mddev_to_conf(r1_bio->mddev); |
306 | 306 | ||
307 | if (bio->bi_size) | 307 | if (bio->bi_size) |
@@ -311,47 +311,54 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int | |||
311 | if (r1_bio->bios[mirror] == bio) | 311 | if (r1_bio->bios[mirror] == bio) |
312 | break; | 312 | break; |
313 | 313 | ||
314 | /* | 314 | if (error == -ENOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) { |
315 | * this branch is our 'one mirror IO has finished' event handler: | 315 | set_bit(BarriersNotsupp, &conf->mirrors[mirror].rdev->flags); |
316 | */ | 316 | set_bit(R1BIO_BarrierRetry, &r1_bio->state); |
317 | if (!uptodate) { | 317 | r1_bio->mddev->barriers_work = 0; |
318 | md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); | 318 | } else { |
319 | /* an I/O failed, we can't clear the bitmap */ | ||
320 | set_bit(R1BIO_Degraded, &r1_bio->state); | ||
321 | } else | ||
322 | /* | 319 | /* |
323 | * Set R1BIO_Uptodate in our master bio, so that | 320 | * this branch is our 'one mirror IO has finished' event handler: |
324 | * we will return a good error code for to the higher | ||
325 | * levels even if IO on some other mirrored buffer fails. | ||
326 | * | ||
327 | * The 'master' represents the composite IO operation to | ||
328 | * user-side. So if something waits for IO, then it will | ||
329 | * wait for the 'master' bio. | ||
330 | */ | 321 | */ |
331 | set_bit(R1BIO_Uptodate, &r1_bio->state); | 322 | r1_bio->bios[mirror] = NULL; |
332 | 323 | bio_put(bio); | |
333 | update_head_pos(mirror, r1_bio); | 324 | if (!uptodate) { |
334 | 325 | md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); | |
335 | behind = test_bit(R1BIO_BehindIO, &r1_bio->state); | 326 | /* an I/O failed, we can't clear the bitmap */ |
336 | if (behind) { | 327 | set_bit(R1BIO_Degraded, &r1_bio->state); |
337 | if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags)) | 328 | } else |
338 | atomic_dec(&r1_bio->behind_remaining); | 329 | /* |
339 | 330 | * Set R1BIO_Uptodate in our master bio, so that | |
340 | /* In behind mode, we ACK the master bio once the I/O has safely | 331 | * we will return a good error code for to the higher |
341 | * reached all non-writemostly disks. Setting the Returned bit | 332 | * levels even if IO on some other mirrored buffer fails. |
342 | * ensures that this gets done only once -- we don't ever want to | 333 | * |
343 | * return -EIO here, instead we'll wait */ | 334 | * The 'master' represents the composite IO operation to |
344 | 335 | * user-side. So if something waits for IO, then it will | |
345 | if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) && | 336 | * wait for the 'master' bio. |
346 | test_bit(R1BIO_Uptodate, &r1_bio->state)) { | 337 | */ |
347 | /* Maybe we can return now */ | 338 | set_bit(R1BIO_Uptodate, &r1_bio->state); |
348 | if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { | 339 | |
349 | struct bio *mbio = r1_bio->master_bio; | 340 | update_head_pos(mirror, r1_bio); |
350 | PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n", | 341 | |
351 | (unsigned long long) mbio->bi_sector, | 342 | if (behind) { |
352 | (unsigned long long) mbio->bi_sector + | 343 | if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags)) |
353 | (mbio->bi_size >> 9) - 1); | 344 | atomic_dec(&r1_bio->behind_remaining); |
354 | bio_endio(mbio, mbio->bi_size, 0); | 345 | |
346 | /* In behind mode, we ACK the master bio once the I/O has safely | ||
347 | * reached all non-writemostly disks. Setting the Returned bit | ||
348 | * ensures that this gets done only once -- we don't ever want to | ||
349 | * return -EIO here, instead we'll wait */ | ||
350 | |||
351 | if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) && | ||
352 | test_bit(R1BIO_Uptodate, &r1_bio->state)) { | ||
353 | /* Maybe we can return now */ | ||
354 | if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { | ||
355 | struct bio *mbio = r1_bio->master_bio; | ||
356 | PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n", | ||
357 | (unsigned long long) mbio->bi_sector, | ||
358 | (unsigned long long) mbio->bi_sector + | ||
359 | (mbio->bi_size >> 9) - 1); | ||
360 | bio_endio(mbio, mbio->bi_size, 0); | ||
361 | } | ||
355 | } | 362 | } |
356 | } | 363 | } |
357 | } | 364 | } |
@@ -361,8 +368,16 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int | |||
361 | * already. | 368 | * already. |
362 | */ | 369 | */ |
363 | if (atomic_dec_and_test(&r1_bio->remaining)) { | 370 | if (atomic_dec_and_test(&r1_bio->remaining)) { |
371 | if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) { | ||
372 | reschedule_retry(r1_bio); | ||
373 | /* Don't dec_pending yet, we want to hold | ||
374 | * the reference over the retry | ||
375 | */ | ||
376 | return 0; | ||
377 | } | ||
364 | if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { | 378 | if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { |
365 | /* free extra copy of the data pages */ | 379 | /* free extra copy of the data pages */ |
380 | /* FIXME bio has been freed!!! */ | ||
366 | int i = bio->bi_vcnt; | 381 | int i = bio->bi_vcnt; |
367 | while (i--) | 382 | while (i--) |
368 | __free_page(bio->bi_io_vec[i].bv_page); | 383 | __free_page(bio->bi_io_vec[i].bv_page); |
@@ -648,8 +663,9 @@ static int make_request(request_queue_t *q, struct bio * bio) | |||
648 | struct bio_list bl; | 663 | struct bio_list bl; |
649 | struct page **behind_pages = NULL; | 664 | struct page **behind_pages = NULL; |
650 | const int rw = bio_data_dir(bio); | 665 | const int rw = bio_data_dir(bio); |
666 | int do_barriers; | ||
651 | 667 | ||
652 | if (unlikely(bio_barrier(bio))) { | 668 | if (unlikely(!mddev->barriers_work && bio_barrier(bio))) { |
653 | bio_endio(bio, bio->bi_size, -EOPNOTSUPP); | 669 | bio_endio(bio, bio->bi_size, -EOPNOTSUPP); |
654 | return 0; | 670 | return 0; |
655 | } | 671 | } |
@@ -759,6 +775,10 @@ static int make_request(request_queue_t *q, struct bio * bio) | |||
759 | atomic_set(&r1_bio->remaining, 0); | 775 | atomic_set(&r1_bio->remaining, 0); |
760 | atomic_set(&r1_bio->behind_remaining, 0); | 776 | atomic_set(&r1_bio->behind_remaining, 0); |
761 | 777 | ||
778 | do_barriers = bio->bi_rw & BIO_RW_BARRIER; | ||
779 | if (do_barriers) | ||
780 | set_bit(R1BIO_Barrier, &r1_bio->state); | ||
781 | |||
762 | bio_list_init(&bl); | 782 | bio_list_init(&bl); |
763 | for (i = 0; i < disks; i++) { | 783 | for (i = 0; i < disks; i++) { |
764 | struct bio *mbio; | 784 | struct bio *mbio; |
@@ -771,7 +791,7 @@ static int make_request(request_queue_t *q, struct bio * bio) | |||
771 | mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; | 791 | mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; |
772 | mbio->bi_bdev = conf->mirrors[i].rdev->bdev; | 792 | mbio->bi_bdev = conf->mirrors[i].rdev->bdev; |
773 | mbio->bi_end_io = raid1_end_write_request; | 793 | mbio->bi_end_io = raid1_end_write_request; |
774 | mbio->bi_rw = WRITE; | 794 | mbio->bi_rw = WRITE | do_barriers; |
775 | mbio->bi_private = r1_bio; | 795 | mbio->bi_private = r1_bio; |
776 | 796 | ||
777 | if (behind_pages) { | 797 | if (behind_pages) { |
@@ -1153,6 +1173,36 @@ static void raid1d(mddev_t *mddev) | |||
1153 | if (test_bit(R1BIO_IsSync, &r1_bio->state)) { | 1173 | if (test_bit(R1BIO_IsSync, &r1_bio->state)) { |
1154 | sync_request_write(mddev, r1_bio); | 1174 | sync_request_write(mddev, r1_bio); |
1155 | unplug = 1; | 1175 | unplug = 1; |
1176 | } else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) { | ||
1177 | /* some requests in the r1bio were BIO_RW_BARRIER | ||
1178 | * requests which failed with -ENOTSUPP. Hohumm.. | ||
1179 | * Better resubmit without the barrier. | ||
1180 | * We know which devices to resubmit for, because | ||
1181 | * all others have had their bios[] entry cleared. | ||
1182 | */ | ||
1183 | int i; | ||
1184 | clear_bit(R1BIO_BarrierRetry, &r1_bio->state); | ||
1185 | clear_bit(R1BIO_Barrier, &r1_bio->state); | ||
1186 | for (i=0; i < conf->raid_disks; i++) | ||
1187 | if (r1_bio->bios[i]) { | ||
1188 | struct bio_vec *bvec; | ||
1189 | int j; | ||
1190 | |||
1191 | bio = bio_clone(r1_bio->master_bio, GFP_NOIO); | ||
1192 | /* copy pages from the failed bio, as | ||
1193 | * this might be a write-behind device */ | ||
1194 | __bio_for_each_segment(bvec, bio, j, 0) | ||
1195 | bvec->bv_page = bio_iovec_idx(r1_bio->bios[i], j)->bv_page; | ||
1196 | bio_put(r1_bio->bios[i]); | ||
1197 | bio->bi_sector = r1_bio->sector + | ||
1198 | conf->mirrors[i].rdev->data_offset; | ||
1199 | bio->bi_bdev = conf->mirrors[i].rdev->bdev; | ||
1200 | bio->bi_end_io = raid1_end_write_request; | ||
1201 | bio->bi_rw = WRITE; | ||
1202 | bio->bi_private = r1_bio; | ||
1203 | r1_bio->bios[i] = bio; | ||
1204 | generic_make_request(bio); | ||
1205 | } | ||
1156 | } else { | 1206 | } else { |
1157 | int disk; | 1207 | int disk; |
1158 | bio = r1_bio->bios[r1_bio->read_disk]; | 1208 | bio = r1_bio->bios[r1_bio->read_disk]; |