diff options
| -rw-r--r-- | drivers/md/raid10.c | 127 | ||||
| -rw-r--r-- | include/linux/raid/raid10.h | 2 |
2 files changed, 114 insertions, 15 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 1fa70c34b7d2..64bb4ddc6798 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
| @@ -209,6 +209,7 @@ static void reschedule_retry(r10bio_t *r10_bio) | |||
| 209 | 209 | ||
| 210 | spin_lock_irqsave(&conf->device_lock, flags); | 210 | spin_lock_irqsave(&conf->device_lock, flags); |
| 211 | list_add(&r10_bio->retry_list, &conf->retry_list); | 211 | list_add(&r10_bio->retry_list, &conf->retry_list); |
| 212 | conf->nr_queued ++; | ||
| 212 | spin_unlock_irqrestore(&conf->device_lock, flags); | 213 | spin_unlock_irqrestore(&conf->device_lock, flags); |
| 213 | 214 | ||
| 214 | md_wakeup_thread(mddev->thread); | 215 | md_wakeup_thread(mddev->thread); |
| @@ -254,9 +255,9 @@ static int raid10_end_read_request(struct bio *bio, unsigned int bytes_done, int | |||
| 254 | /* | 255 | /* |
| 255 | * this branch is our 'one mirror IO has finished' event handler: | 256 | * this branch is our 'one mirror IO has finished' event handler: |
| 256 | */ | 257 | */ |
| 257 | if (!uptodate) | 258 | update_head_pos(slot, r10_bio); |
| 258 | md_error(r10_bio->mddev, conf->mirrors[dev].rdev); | 259 | |
| 259 | else | 260 | if (uptodate) { |
| 260 | /* | 261 | /* |
| 261 | * Set R10BIO_Uptodate in our master bio, so that | 262 | * Set R10BIO_Uptodate in our master bio, so that |
| 262 | * we will return a good error code to the higher | 263 | * we will return a good error code to the higher |
| @@ -267,15 +268,8 @@ static int raid10_end_read_request(struct bio *bio, unsigned int bytes_done, int | |||
| 267 | * wait for the 'master' bio. | 268 | * wait for the 'master' bio. |
| 268 | */ | 269 | */ |
| 269 | set_bit(R10BIO_Uptodate, &r10_bio->state); | 270 | set_bit(R10BIO_Uptodate, &r10_bio->state); |
| 270 | |||
| 271 | update_head_pos(slot, r10_bio); | ||
| 272 | |||
| 273 | /* | ||
| 274 | * we have only one bio on the read side | ||
| 275 | */ | ||
| 276 | if (uptodate) | ||
| 277 | raid_end_bio_io(r10_bio); | 271 | raid_end_bio_io(r10_bio); |
| 278 | else { | 272 | } else { |
| 279 | /* | 273 | /* |
| 280 | * oops, read error: | 274 | * oops, read error: |
| 281 | */ | 275 | */ |
| @@ -714,6 +708,33 @@ static void allow_barrier(conf_t *conf) | |||
| 714 | wake_up(&conf->wait_barrier); | 708 | wake_up(&conf->wait_barrier); |
| 715 | } | 709 | } |
| 716 | 710 | ||
| 711 | static void freeze_array(conf_t *conf) | ||
| 712 | { | ||
| 713 | /* stop syncio and normal IO and wait for everything to | ||
| 714 | * go quite. | ||
| 715 | * We increment barrier and nr_waiting, and then | ||
| 716 | * wait until barrier+nr_pending match nr_queued+2 | ||
| 717 | */ | ||
| 718 | spin_lock_irq(&conf->resync_lock); | ||
| 719 | conf->barrier++; | ||
| 720 | conf->nr_waiting++; | ||
| 721 | wait_event_lock_irq(conf->wait_barrier, | ||
| 722 | conf->barrier+conf->nr_pending == conf->nr_queued+2, | ||
| 723 | conf->resync_lock, | ||
| 724 | raid10_unplug(conf->mddev->queue)); | ||
| 725 | spin_unlock_irq(&conf->resync_lock); | ||
| 726 | } | ||
| 727 | |||
| 728 | static void unfreeze_array(conf_t *conf) | ||
| 729 | { | ||
| 730 | /* reverse the effect of the freeze */ | ||
| 731 | spin_lock_irq(&conf->resync_lock); | ||
| 732 | conf->barrier--; | ||
| 733 | conf->nr_waiting--; | ||
| 734 | wake_up(&conf->wait_barrier); | ||
| 735 | spin_unlock_irq(&conf->resync_lock); | ||
| 736 | } | ||
| 737 | |||
| 717 | static int make_request(request_queue_t *q, struct bio * bio) | 738 | static int make_request(request_queue_t *q, struct bio * bio) |
| 718 | { | 739 | { |
| 719 | mddev_t *mddev = q->queuedata; | 740 | mddev_t *mddev = q->queuedata; |
| @@ -1338,6 +1359,7 @@ static void raid10d(mddev_t *mddev) | |||
| 1338 | break; | 1359 | break; |
| 1339 | r10_bio = list_entry(head->prev, r10bio_t, retry_list); | 1360 | r10_bio = list_entry(head->prev, r10bio_t, retry_list); |
| 1340 | list_del(head->prev); | 1361 | list_del(head->prev); |
| 1362 | conf->nr_queued--; | ||
| 1341 | spin_unlock_irqrestore(&conf->device_lock, flags); | 1363 | spin_unlock_irqrestore(&conf->device_lock, flags); |
| 1342 | 1364 | ||
| 1343 | mddev = r10_bio->mddev; | 1365 | mddev = r10_bio->mddev; |
| @@ -1350,6 +1372,78 @@ static void raid10d(mddev_t *mddev) | |||
| 1350 | unplug = 1; | 1372 | unplug = 1; |
| 1351 | } else { | 1373 | } else { |
| 1352 | int mirror; | 1374 | int mirror; |
| 1375 | /* we got a read error. Maybe the drive is bad. Maybe just | ||
| 1376 | * the block and we can fix it. | ||
| 1377 | * We freeze all other IO, and try reading the block from | ||
| 1378 | * other devices. When we find one, we re-write | ||
| 1379 | * and check it that fixes the read error. | ||
| 1380 | * This is all done synchronously while the array is | ||
| 1381 | * frozen. | ||
| 1382 | */ | ||
| 1383 | int sect = 0; /* Offset from r10_bio->sector */ | ||
| 1384 | int sectors = r10_bio->sectors; | ||
| 1385 | freeze_array(conf); | ||
| 1386 | if (mddev->ro == 0) while(sectors) { | ||
| 1387 | int s = sectors; | ||
| 1388 | int sl = r10_bio->read_slot; | ||
| 1389 | int success = 0; | ||
| 1390 | |||
| 1391 | if (s > (PAGE_SIZE>>9)) | ||
| 1392 | s = PAGE_SIZE >> 9; | ||
| 1393 | |||
| 1394 | do { | ||
| 1395 | int d = r10_bio->devs[sl].devnum; | ||
| 1396 | rdev = conf->mirrors[d].rdev; | ||
| 1397 | if (rdev && | ||
| 1398 | test_bit(In_sync, &rdev->flags) && | ||
| 1399 | sync_page_io(rdev->bdev, | ||
| 1400 | r10_bio->devs[sl].addr + | ||
| 1401 | sect + rdev->data_offset, | ||
| 1402 | s<<9, | ||
| 1403 | conf->tmppage, READ)) | ||
| 1404 | success = 1; | ||
| 1405 | else { | ||
| 1406 | sl++; | ||
| 1407 | if (sl == conf->copies) | ||
| 1408 | sl = 0; | ||
| 1409 | } | ||
| 1410 | } while (!success && sl != r10_bio->read_slot); | ||
| 1411 | |||
| 1412 | if (success) { | ||
| 1413 | /* write it back and re-read */ | ||
| 1414 | while (sl != r10_bio->read_slot) { | ||
| 1415 | int d; | ||
| 1416 | if (sl==0) | ||
| 1417 | sl = conf->copies; | ||
| 1418 | sl--; | ||
| 1419 | d = r10_bio->devs[sl].devnum; | ||
| 1420 | rdev = conf->mirrors[d].rdev; | ||
| 1421 | if (rdev && | ||
| 1422 | test_bit(In_sync, &rdev->flags)) { | ||
| 1423 | if (sync_page_io(rdev->bdev, | ||
| 1424 | r10_bio->devs[sl].addr + | ||
| 1425 | sect + rdev->data_offset, | ||
| 1426 | s<<9, conf->tmppage, WRITE) == 0 || | ||
| 1427 | sync_page_io(rdev->bdev, | ||
| 1428 | r10_bio->devs[sl].addr + | ||
| 1429 | sect + rdev->data_offset, | ||
| 1430 | s<<9, conf->tmppage, READ) == 0) { | ||
| 1431 | /* Well, this device is dead */ | ||
| 1432 | md_error(mddev, rdev); | ||
| 1433 | } | ||
| 1434 | } | ||
| 1435 | } | ||
| 1436 | } else { | ||
| 1437 | /* Cannot read from anywhere -- bye bye array */ | ||
| 1438 | md_error(mddev, conf->mirrors[r10_bio->devs[r10_bio->read_slot].devnum].rdev); | ||
| 1439 | break; | ||
| 1440 | } | ||
| 1441 | sectors -= s; | ||
| 1442 | sect += s; | ||
| 1443 | } | ||
| 1444 | |||
| 1445 | unfreeze_array(conf); | ||
| 1446 | |||
| 1353 | bio = r10_bio->devs[r10_bio->read_slot].bio; | 1447 | bio = r10_bio->devs[r10_bio->read_slot].bio; |
| 1354 | r10_bio->devs[r10_bio->read_slot].bio = NULL; | 1448 | r10_bio->devs[r10_bio->read_slot].bio = NULL; |
| 1355 | bio_put(bio); | 1449 | bio_put(bio); |
| @@ -1793,22 +1887,24 @@ static int run(mddev_t *mddev) | |||
| 1793 | * bookkeeping area. [whatever we allocate in run(), | 1887 | * bookkeeping area. [whatever we allocate in run(), |
| 1794 | * should be freed in stop()] | 1888 | * should be freed in stop()] |
| 1795 | */ | 1889 | */ |
| 1796 | conf = kmalloc(sizeof(conf_t), GFP_KERNEL); | 1890 | conf = kzalloc(sizeof(conf_t), GFP_KERNEL); |
| 1797 | mddev->private = conf; | 1891 | mddev->private = conf; |
| 1798 | if (!conf) { | 1892 | if (!conf) { |
| 1799 | printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", | 1893 | printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", |
| 1800 | mdname(mddev)); | 1894 | mdname(mddev)); |
| 1801 | goto out; | 1895 | goto out; |
| 1802 | } | 1896 | } |
| 1803 | memset(conf, 0, sizeof(*conf)); | 1897 | conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, |
| 1804 | conf->mirrors = kmalloc(sizeof(struct mirror_info)*mddev->raid_disks, | ||
| 1805 | GFP_KERNEL); | 1898 | GFP_KERNEL); |
| 1806 | if (!conf->mirrors) { | 1899 | if (!conf->mirrors) { |
| 1807 | printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", | 1900 | printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", |
| 1808 | mdname(mddev)); | 1901 | mdname(mddev)); |
| 1809 | goto out_free_conf; | 1902 | goto out_free_conf; |
| 1810 | } | 1903 | } |
| 1811 | memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks); | 1904 | |
| 1905 | conf->tmppage = alloc_page(GFP_KERNEL); | ||
| 1906 | if (!conf->tmppage) | ||
| 1907 | goto out_free_conf; | ||
| 1812 | 1908 | ||
| 1813 | conf->near_copies = nc; | 1909 | conf->near_copies = nc; |
| 1814 | conf->far_copies = fc; | 1910 | conf->far_copies = fc; |
| @@ -1918,6 +2014,7 @@ static int run(mddev_t *mddev) | |||
| 1918 | out_free_conf: | 2014 | out_free_conf: |
| 1919 | if (conf->r10bio_pool) | 2015 | if (conf->r10bio_pool) |
| 1920 | mempool_destroy(conf->r10bio_pool); | 2016 | mempool_destroy(conf->r10bio_pool); |
| 2017 | put_page(conf->tmppage); | ||
| 1921 | kfree(conf->mirrors); | 2018 | kfree(conf->mirrors); |
| 1922 | kfree(conf); | 2019 | kfree(conf); |
| 1923 | mddev->private = NULL; | 2020 | mddev->private = NULL; |
diff --git a/include/linux/raid/raid10.h b/include/linux/raid/raid10.h index b660cbf628d8..dfa528385e3f 100644 --- a/include/linux/raid/raid10.h +++ b/include/linux/raid/raid10.h | |||
| @@ -42,6 +42,7 @@ struct r10_private_data_s { | |||
| 42 | spinlock_t resync_lock; | 42 | spinlock_t resync_lock; |
| 43 | int nr_pending; | 43 | int nr_pending; |
| 44 | int nr_waiting; | 44 | int nr_waiting; |
| 45 | int nr_queued; | ||
| 45 | int barrier; | 46 | int barrier; |
| 46 | sector_t next_resync; | 47 | sector_t next_resync; |
| 47 | int fullsync; /* set to 1 if a full sync is needed, | 48 | int fullsync; /* set to 1 if a full sync is needed, |
| @@ -53,6 +54,7 @@ struct r10_private_data_s { | |||
| 53 | 54 | ||
| 54 | mempool_t *r10bio_pool; | 55 | mempool_t *r10bio_pool; |
| 55 | mempool_t *r10buf_pool; | 56 | mempool_t *r10buf_pool; |
| 57 | struct page *tmppage; | ||
| 56 | }; | 58 | }; |
| 57 | 59 | ||
| 58 | typedef struct r10_private_data_s conf_t; | 60 | typedef struct r10_private_data_s conf_t; |
