aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2005-11-09 00:39:34 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2005-11-09 10:56:38 -0500
commita9701a30470856408d08657eb1bd7ae29a146190 (patch)
treeeb6ea8c82fdc1b50bf56abadeee63a935034cf27 /drivers/md/md.c
parentbd926c63b7a6843d3ce2728396c0891e54fce5c4 (diff)
[PATCH] md: support BIO_RW_BARRIER for md/raid1
We can only accept BARRIER requests if all slaves handle barriers, and that can, of course, change with time.... So we keep track of whether the whole array seems safe for barriers, and also whether each individual rdev handles barriers. We initially assumes barriers are OK. When writing the superblock we try a barrier, and if that fails, we flag things for no-barriers. This will usually clear the flags fairly quickly. If writing the superblock finds that BIO_RW_BARRIER is -ENOTSUPP, we need to resubmit, so introduce function "md_super_wait" which waits for requests to finish, and retries ENOTSUPP requests without the barrier flag. When writing the real raid1, write requests which were BIO_RW_BARRIER but which aresn't supported need to be retried. So raid1d is enhanced to do this, and when any bio write completes (i.e. no retry needed) we remove it from the r1bio, so that devices needing retry are easy to find. We should hardly ever get -ENOTSUPP errors when writing data to the raid. It should only happen if: 1/ the device used to support BARRIER, but now doesn't. Few devices change like this, though raid1 can! or 2/ the array has no persistent superblock, so there was no opportunity to pre-test for barriers when writing the superblock. Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c99
1 files changed, 83 insertions, 16 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index caa4add00c1b..199016932de5 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -330,18 +330,46 @@ static void free_disk_sb(mdk_rdev_t * rdev)
330static int super_written(struct bio *bio, unsigned int bytes_done, int error) 330static int super_written(struct bio *bio, unsigned int bytes_done, int error)
331{ 331{
332 mdk_rdev_t *rdev = bio->bi_private; 332 mdk_rdev_t *rdev = bio->bi_private;
333 mddev_t *mddev = rdev->mddev;
333 if (bio->bi_size) 334 if (bio->bi_size)
334 return 1; 335 return 1;
335 336
336 if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) 337 if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags))
337 md_error(rdev->mddev, rdev); 338 md_error(mddev, rdev);
338 339
339 if (atomic_dec_and_test(&rdev->mddev->pending_writes)) 340 if (atomic_dec_and_test(&mddev->pending_writes))
340 wake_up(&rdev->mddev->sb_wait); 341 wake_up(&mddev->sb_wait);
341 bio_put(bio); 342 bio_put(bio);
342 return 0; 343 return 0;
343} 344}
344 345
346static int super_written_barrier(struct bio *bio, unsigned int bytes_done, int error)
347{
348 struct bio *bio2 = bio->bi_private;
349 mdk_rdev_t *rdev = bio2->bi_private;
350 mddev_t *mddev = rdev->mddev;
351 if (bio->bi_size)
352 return 1;
353
354 if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
355 error == -EOPNOTSUPP) {
356 unsigned long flags;
357 /* barriers don't appear to be supported :-( */
358 set_bit(BarriersNotsupp, &rdev->flags);
359 mddev->barriers_work = 0;
360 spin_lock_irqsave(&mddev->write_lock, flags);
361 bio2->bi_next = mddev->biolist;
362 mddev->biolist = bio2;
363 spin_unlock_irqrestore(&mddev->write_lock, flags);
364 wake_up(&mddev->sb_wait);
365 bio_put(bio);
366 return 0;
367 }
368 bio_put(bio2);
369 bio->bi_private = rdev;
370 return super_written(bio, bytes_done, error);
371}
372
345void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, 373void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
346 sector_t sector, int size, struct page *page) 374 sector_t sector, int size, struct page *page)
347{ 375{
@@ -350,16 +378,54 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
350 * and decrement it on completion, waking up sb_wait 378 * and decrement it on completion, waking up sb_wait
351 * if zero is reached. 379 * if zero is reached.
352 * If an error occurred, call md_error 380 * If an error occurred, call md_error
381 *
382 * As we might need to resubmit the request if BIO_RW_BARRIER
383 * causes ENOTSUPP, we allocate a spare bio...
353 */ 384 */
354 struct bio *bio = bio_alloc(GFP_NOIO, 1); 385 struct bio *bio = bio_alloc(GFP_NOIO, 1);
386 int rw = (1<<BIO_RW) | (1<<BIO_RW_SYNC);
355 387
356 bio->bi_bdev = rdev->bdev; 388 bio->bi_bdev = rdev->bdev;
357 bio->bi_sector = sector; 389 bio->bi_sector = sector;
358 bio_add_page(bio, page, size, 0); 390 bio_add_page(bio, page, size, 0);
359 bio->bi_private = rdev; 391 bio->bi_private = rdev;
360 bio->bi_end_io = super_written; 392 bio->bi_end_io = super_written;
393 bio->bi_rw = rw;
394
361 atomic_inc(&mddev->pending_writes); 395 atomic_inc(&mddev->pending_writes);
362 submit_bio((1<<BIO_RW)|(1<<BIO_RW_SYNC), bio); 396 if (!test_bit(BarriersNotsupp, &rdev->flags)) {
397 struct bio *rbio;
398 rw |= (1<<BIO_RW_BARRIER);
399 rbio = bio_clone(bio, GFP_NOIO);
400 rbio->bi_private = bio;
401 rbio->bi_end_io = super_written_barrier;
402 submit_bio(rw, rbio);
403 } else
404 submit_bio(rw, bio);
405}
406
407void md_super_wait(mddev_t *mddev)
408{
409 /* wait for all superblock writes that were scheduled to complete.
410 * if any had to be retried (due to BARRIER problems), retry them
411 */
412 DEFINE_WAIT(wq);
413 for(;;) {
414 prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE);
415 if (atomic_read(&mddev->pending_writes)==0)
416 break;
417 while (mddev->biolist) {
418 struct bio *bio;
419 spin_lock_irq(&mddev->write_lock);
420 bio = mddev->biolist;
421 mddev->biolist = bio->bi_next ;
422 bio->bi_next = NULL;
423 spin_unlock_irq(&mddev->write_lock);
424 submit_bio(bio->bi_rw, bio);
425 }
426 schedule();
427 }
428 finish_wait(&mddev->sb_wait, &wq);
363} 429}
364 430
365static int bi_complete(struct bio *bio, unsigned int bytes_done, int error) 431static int bi_complete(struct bio *bio, unsigned int bytes_done, int error)
@@ -1382,7 +1448,7 @@ static void md_update_sb(mddev_t * mddev)
1382 int sync_req; 1448 int sync_req;
1383 1449
1384repeat: 1450repeat:
1385 spin_lock(&mddev->write_lock); 1451 spin_lock_irq(&mddev->write_lock);
1386 sync_req = mddev->in_sync; 1452 sync_req = mddev->in_sync;
1387 mddev->utime = get_seconds(); 1453 mddev->utime = get_seconds();
1388 mddev->events ++; 1454 mddev->events ++;
@@ -1405,11 +1471,11 @@ repeat:
1405 */ 1471 */
1406 if (!mddev->persistent) { 1472 if (!mddev->persistent) {
1407 mddev->sb_dirty = 0; 1473 mddev->sb_dirty = 0;
1408 spin_unlock(&mddev->write_lock); 1474 spin_unlock_irq(&mddev->write_lock);
1409 wake_up(&mddev->sb_wait); 1475 wake_up(&mddev->sb_wait);
1410 return; 1476 return;
1411 } 1477 }
1412 spin_unlock(&mddev->write_lock); 1478 spin_unlock_irq(&mddev->write_lock);
1413 1479
1414 dprintk(KERN_INFO 1480 dprintk(KERN_INFO
1415 "md: updating %s RAID superblock on device (in sync %d)\n", 1481 "md: updating %s RAID superblock on device (in sync %d)\n",
@@ -1437,17 +1503,17 @@ repeat:
1437 /* only need to write one superblock... */ 1503 /* only need to write one superblock... */
1438 break; 1504 break;
1439 } 1505 }
1440 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0); 1506 md_super_wait(mddev);
1441 /* if there was a failure, sb_dirty was set to 1, and we re-write super */ 1507 /* if there was a failure, sb_dirty was set to 1, and we re-write super */
1442 1508
1443 spin_lock(&mddev->write_lock); 1509 spin_lock_irq(&mddev->write_lock);
1444 if (mddev->in_sync != sync_req|| mddev->sb_dirty == 1) { 1510 if (mddev->in_sync != sync_req|| mddev->sb_dirty == 1) {
1445 /* have to write it out again */ 1511 /* have to write it out again */
1446 spin_unlock(&mddev->write_lock); 1512 spin_unlock_irq(&mddev->write_lock);
1447 goto repeat; 1513 goto repeat;
1448 } 1514 }
1449 mddev->sb_dirty = 0; 1515 mddev->sb_dirty = 0;
1450 spin_unlock(&mddev->write_lock); 1516 spin_unlock_irq(&mddev->write_lock);
1451 wake_up(&mddev->sb_wait); 1517 wake_up(&mddev->sb_wait);
1452 1518
1453} 1519}
@@ -1989,6 +2055,7 @@ static int do_md_run(mddev_t * mddev)
1989 2055
1990 mddev->recovery = 0; 2056 mddev->recovery = 0;
1991 mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ 2057 mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */
2058 mddev->barriers_work = 1;
1992 2059
1993 /* before we start the array running, initialise the bitmap */ 2060 /* before we start the array running, initialise the bitmap */
1994 err = bitmap_create(mddev); 2061 err = bitmap_create(mddev);
@@ -2107,7 +2174,7 @@ static int do_md_stop(mddev_t * mddev, int ro)
2107 mddev->ro = 1; 2174 mddev->ro = 1;
2108 } else { 2175 } else {
2109 bitmap_flush(mddev); 2176 bitmap_flush(mddev);
2110 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0); 2177 md_super_wait(mddev);
2111 if (mddev->ro) 2178 if (mddev->ro)
2112 set_disk_ro(disk, 0); 2179 set_disk_ro(disk, 0);
2113 blk_queue_make_request(mddev->queue, md_fail_request); 2180 blk_queue_make_request(mddev->queue, md_fail_request);
@@ -3796,13 +3863,13 @@ void md_write_start(mddev_t *mddev, struct bio *bi)
3796 3863
3797 atomic_inc(&mddev->writes_pending); 3864 atomic_inc(&mddev->writes_pending);
3798 if (mddev->in_sync) { 3865 if (mddev->in_sync) {
3799 spin_lock(&mddev->write_lock); 3866 spin_lock_irq(&mddev->write_lock);
3800 if (mddev->in_sync) { 3867 if (mddev->in_sync) {
3801 mddev->in_sync = 0; 3868 mddev->in_sync = 0;
3802 mddev->sb_dirty = 1; 3869 mddev->sb_dirty = 1;
3803 md_wakeup_thread(mddev->thread); 3870 md_wakeup_thread(mddev->thread);
3804 } 3871 }
3805 spin_unlock(&mddev->write_lock); 3872 spin_unlock_irq(&mddev->write_lock);
3806 } 3873 }
3807 wait_event(mddev->sb_wait, mddev->sb_dirty==0); 3874 wait_event(mddev->sb_wait, mddev->sb_dirty==0);
3808} 3875}
@@ -4112,7 +4179,7 @@ void md_check_recovery(mddev_t *mddev)
4112 if (mddev_trylock(mddev)==0) { 4179 if (mddev_trylock(mddev)==0) {
4113 int spares =0; 4180 int spares =0;
4114 4181
4115 spin_lock(&mddev->write_lock); 4182 spin_lock_irq(&mddev->write_lock);
4116 if (mddev->safemode && !atomic_read(&mddev->writes_pending) && 4183 if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
4117 !mddev->in_sync && mddev->recovery_cp == MaxSector) { 4184 !mddev->in_sync && mddev->recovery_cp == MaxSector) {
4118 mddev->in_sync = 1; 4185 mddev->in_sync = 1;
@@ -4120,7 +4187,7 @@ void md_check_recovery(mddev_t *mddev)
4120 } 4187 }
4121 if (mddev->safemode == 1) 4188 if (mddev->safemode == 1)
4122 mddev->safemode = 0; 4189 mddev->safemode = 0;
4123 spin_unlock(&mddev->write_lock); 4190 spin_unlock_irq(&mddev->write_lock);
4124 4191
4125 if (mddev->sb_dirty) 4192 if (mddev->sb_dirty)
4126 md_update_sb(mddev); 4193 md_update_sb(mddev);