diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/md.c | 1 | ||||
-rw-r--r-- | drivers/md/raid1.c | 115 |
2 files changed, 106 insertions, 10 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 64e7da3701a5..1364a1c97e6f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -461,6 +461,7 @@ int sync_page_io(struct block_device *bdev, sector_t sector, int size, | |||
461 | bio_put(bio); | 461 | bio_put(bio); |
462 | return ret; | 462 | return ret; |
463 | } | 463 | } |
464 | EXPORT_SYMBOL(sync_page_io); | ||
464 | 465 | ||
465 | static int read_disk_sb(mdk_rdev_t * rdev, int size) | 466 | static int read_disk_sb(mdk_rdev_t * rdev, int size) |
466 | { | 467 | { |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index c618015f07f6..b3856db8d6c2 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -191,6 +191,7 @@ static void reschedule_retry(r1bio_t *r1_bio) | |||
191 | 191 | ||
192 | spin_lock_irqsave(&conf->device_lock, flags); | 192 | spin_lock_irqsave(&conf->device_lock, flags); |
193 | list_add(&r1_bio->retry_list, &conf->retry_list); | 193 | list_add(&r1_bio->retry_list, &conf->retry_list); |
194 | conf->nr_queued ++; | ||
194 | spin_unlock_irqrestore(&conf->device_lock, flags); | 195 | spin_unlock_irqrestore(&conf->device_lock, flags); |
195 | 196 | ||
196 | wake_up(&conf->wait_barrier); | 197 | wake_up(&conf->wait_barrier); |
@@ -245,9 +246,9 @@ static int raid1_end_read_request(struct bio *bio, unsigned int bytes_done, int | |||
245 | /* | 246 | /* |
246 | * this branch is our 'one mirror IO has finished' event handler: | 247 | * this branch is our 'one mirror IO has finished' event handler: |
247 | */ | 248 | */ |
248 | if (!uptodate) | 249 | update_head_pos(mirror, r1_bio); |
249 | md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); | 250 | |
250 | else | 251 | if (uptodate || conf->working_disks <= 1) { |
251 | /* | 252 | /* |
252 | * Set R1BIO_Uptodate in our master bio, so that | 253 | * Set R1BIO_Uptodate in our master bio, so that |
253 | * we will return a good error code for to the higher | 254 | * we will return a good error code for to the higher |
@@ -259,14 +260,8 @@ static int raid1_end_read_request(struct bio *bio, unsigned int bytes_done, int | |||
259 | */ | 260 | */ |
260 | set_bit(R1BIO_Uptodate, &r1_bio->state); | 261 | set_bit(R1BIO_Uptodate, &r1_bio->state); |
261 | 262 | ||
262 | update_head_pos(mirror, r1_bio); | ||
263 | |||
264 | /* | ||
265 | * we have only one bio on the read side | ||
266 | */ | ||
267 | if (uptodate) | ||
268 | raid_end_bio_io(r1_bio); | 263 | raid_end_bio_io(r1_bio); |
269 | else { | 264 | } else { |
270 | /* | 265 | /* |
271 | * oops, read error: | 266 | * oops, read error: |
272 | */ | 267 | */ |
@@ -653,6 +648,32 @@ static void allow_barrier(conf_t *conf) | |||
653 | wake_up(&conf->wait_barrier); | 648 | wake_up(&conf->wait_barrier); |
654 | } | 649 | } |
655 | 650 | ||
651 | static void freeze_array(conf_t *conf) | ||
652 | { | ||
653 | /* stop syncio and normal IO and wait for everything to | ||
654 | * go quite. | ||
655 | * We increment barrier and nr_waiting, and then | ||
656 | * wait until barrier+nr_pending match nr_queued+2 | ||
657 | */ | ||
658 | spin_lock_irq(&conf->resync_lock); | ||
659 | conf->barrier++; | ||
660 | conf->nr_waiting++; | ||
661 | wait_event_lock_irq(conf->wait_barrier, | ||
662 | conf->barrier+conf->nr_pending == conf->nr_queued+2, | ||
663 | conf->resync_lock, | ||
664 | raid1_unplug(conf->mddev->queue)); | ||
665 | spin_unlock_irq(&conf->resync_lock); | ||
666 | } | ||
667 | static void unfreeze_array(conf_t *conf) | ||
668 | { | ||
669 | /* reverse the effect of the freeze */ | ||
670 | spin_lock_irq(&conf->resync_lock); | ||
671 | conf->barrier--; | ||
672 | conf->nr_waiting--; | ||
673 | wake_up(&conf->wait_barrier); | ||
674 | spin_unlock_irq(&conf->resync_lock); | ||
675 | } | ||
676 | |||
656 | 677 | ||
657 | /* duplicate the data pages for behind I/O */ | 678 | /* duplicate the data pages for behind I/O */ |
658 | static struct page **alloc_behind_pages(struct bio *bio) | 679 | static struct page **alloc_behind_pages(struct bio *bio) |
@@ -1196,6 +1217,7 @@ static void raid1d(mddev_t *mddev) | |||
1196 | break; | 1217 | break; |
1197 | r1_bio = list_entry(head->prev, r1bio_t, retry_list); | 1218 | r1_bio = list_entry(head->prev, r1bio_t, retry_list); |
1198 | list_del(head->prev); | 1219 | list_del(head->prev); |
1220 | conf->nr_queued--; | ||
1199 | spin_unlock_irqrestore(&conf->device_lock, flags); | 1221 | spin_unlock_irqrestore(&conf->device_lock, flags); |
1200 | 1222 | ||
1201 | mddev = r1_bio->mddev; | 1223 | mddev = r1_bio->mddev; |
@@ -1235,6 +1257,74 @@ static void raid1d(mddev_t *mddev) | |||
1235 | } | 1257 | } |
1236 | } else { | 1258 | } else { |
1237 | int disk; | 1259 | int disk; |
1260 | |||
1261 | /* we got a read error. Maybe the drive is bad. Maybe just | ||
1262 | * the block and we can fix it. | ||
1263 | * We freeze all other IO, and try reading the block from | ||
1264 | * other devices. When we find one, we re-write | ||
1265 | * and check it that fixes the read error. | ||
1266 | * This is all done synchronously while the array is | ||
1267 | * frozen | ||
1268 | */ | ||
1269 | sector_t sect = r1_bio->sector; | ||
1270 | int sectors = r1_bio->sectors; | ||
1271 | freeze_array(conf); | ||
1272 | while(sectors) { | ||
1273 | int s = sectors; | ||
1274 | int d = r1_bio->read_disk; | ||
1275 | int success = 0; | ||
1276 | |||
1277 | if (s > (PAGE_SIZE>>9)) | ||
1278 | s = PAGE_SIZE >> 9; | ||
1279 | |||
1280 | do { | ||
1281 | rdev = conf->mirrors[d].rdev; | ||
1282 | if (rdev && | ||
1283 | test_bit(In_sync, &rdev->flags) && | ||
1284 | sync_page_io(rdev->bdev, | ||
1285 | sect + rdev->data_offset, | ||
1286 | s<<9, | ||
1287 | conf->tmppage, READ)) | ||
1288 | success = 1; | ||
1289 | else { | ||
1290 | d++; | ||
1291 | if (d == conf->raid_disks) | ||
1292 | d = 0; | ||
1293 | } | ||
1294 | } while (!success && d != r1_bio->read_disk); | ||
1295 | |||
1296 | if (success) { | ||
1297 | /* write it back and re-read */ | ||
1298 | while (d != r1_bio->read_disk) { | ||
1299 | if (d==0) | ||
1300 | d = conf->raid_disks; | ||
1301 | d--; | ||
1302 | rdev = conf->mirrors[d].rdev; | ||
1303 | if (rdev && | ||
1304 | test_bit(In_sync, &rdev->flags)) { | ||
1305 | if (sync_page_io(rdev->bdev, | ||
1306 | sect + rdev->data_offset, | ||
1307 | s<<9, conf->tmppage, WRITE) == 0 || | ||
1308 | sync_page_io(rdev->bdev, | ||
1309 | sect + rdev->data_offset, | ||
1310 | s<<9, conf->tmppage, READ) == 0) { | ||
1311 | /* Well, this device is dead */ | ||
1312 | md_error(mddev, rdev); | ||
1313 | } | ||
1314 | } | ||
1315 | } | ||
1316 | } else { | ||
1317 | /* Cannot read from anywhere -- bye bye array */ | ||
1318 | md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev); | ||
1319 | break; | ||
1320 | } | ||
1321 | sectors -= s; | ||
1322 | sect += s; | ||
1323 | } | ||
1324 | |||
1325 | |||
1326 | unfreeze_array(conf); | ||
1327 | |||
1238 | bio = r1_bio->bios[r1_bio->read_disk]; | 1328 | bio = r1_bio->bios[r1_bio->read_disk]; |
1239 | if ((disk=read_balance(conf, r1_bio)) == -1) { | 1329 | if ((disk=read_balance(conf, r1_bio)) == -1) { |
1240 | printk(KERN_ALERT "raid1: %s: unrecoverable I/O" | 1330 | printk(KERN_ALERT "raid1: %s: unrecoverable I/O" |
@@ -1529,6 +1619,10 @@ static int run(mddev_t *mddev) | |||
1529 | 1619 | ||
1530 | memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks); | 1620 | memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks); |
1531 | 1621 | ||
1622 | conf->tmppage = alloc_page(GFP_KERNEL); | ||
1623 | if (!conf->tmppage) | ||
1624 | goto out_no_mem; | ||
1625 | |||
1532 | conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL); | 1626 | conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL); |
1533 | if (!conf->poolinfo) | 1627 | if (!conf->poolinfo) |
1534 | goto out_no_mem; | 1628 | goto out_no_mem; |
@@ -1635,6 +1729,7 @@ out_free_conf: | |||
1635 | if (conf->r1bio_pool) | 1729 | if (conf->r1bio_pool) |
1636 | mempool_destroy(conf->r1bio_pool); | 1730 | mempool_destroy(conf->r1bio_pool); |
1637 | kfree(conf->mirrors); | 1731 | kfree(conf->mirrors); |
1732 | __free_page(conf->tmppage); | ||
1638 | kfree(conf->poolinfo); | 1733 | kfree(conf->poolinfo); |
1639 | kfree(conf); | 1734 | kfree(conf); |
1640 | mddev->private = NULL; | 1735 | mddev->private = NULL; |