diff options
-rw-r--r-- | drivers/md/raid10.c | 127 | ||||
-rw-r--r-- | include/linux/raid/raid10.h | 2 |
2 files changed, 114 insertions, 15 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 1fa70c34b7d2..64bb4ddc6798 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -209,6 +209,7 @@ static void reschedule_retry(r10bio_t *r10_bio) | |||
209 | 209 | ||
210 | spin_lock_irqsave(&conf->device_lock, flags); | 210 | spin_lock_irqsave(&conf->device_lock, flags); |
211 | list_add(&r10_bio->retry_list, &conf->retry_list); | 211 | list_add(&r10_bio->retry_list, &conf->retry_list); |
212 | conf->nr_queued ++; | ||
212 | spin_unlock_irqrestore(&conf->device_lock, flags); | 213 | spin_unlock_irqrestore(&conf->device_lock, flags); |
213 | 214 | ||
214 | md_wakeup_thread(mddev->thread); | 215 | md_wakeup_thread(mddev->thread); |
@@ -254,9 +255,9 @@ static int raid10_end_read_request(struct bio *bio, unsigned int bytes_done, int | |||
254 | /* | 255 | /* |
255 | * this branch is our 'one mirror IO has finished' event handler: | 256 | * this branch is our 'one mirror IO has finished' event handler: |
256 | */ | 257 | */ |
257 | if (!uptodate) | 258 | update_head_pos(slot, r10_bio); |
258 | md_error(r10_bio->mddev, conf->mirrors[dev].rdev); | 259 | |
259 | else | 260 | if (uptodate) { |
260 | /* | 261 | /* |
261 | * Set R10BIO_Uptodate in our master bio, so that | 262 | * Set R10BIO_Uptodate in our master bio, so that |
262 | * we will return a good error code to the higher | 263 | * we will return a good error code to the higher |
@@ -267,15 +268,8 @@ static int raid10_end_read_request(struct bio *bio, unsigned int bytes_done, int | |||
267 | * wait for the 'master' bio. | 268 | * wait for the 'master' bio. |
268 | */ | 269 | */ |
269 | set_bit(R10BIO_Uptodate, &r10_bio->state); | 270 | set_bit(R10BIO_Uptodate, &r10_bio->state); |
270 | |||
271 | update_head_pos(slot, r10_bio); | ||
272 | |||
273 | /* | ||
274 | * we have only one bio on the read side | ||
275 | */ | ||
276 | if (uptodate) | ||
277 | raid_end_bio_io(r10_bio); | 271 | raid_end_bio_io(r10_bio); |
278 | else { | 272 | } else { |
279 | /* | 273 | /* |
280 | * oops, read error: | 274 | * oops, read error: |
281 | */ | 275 | */ |
@@ -714,6 +708,33 @@ static void allow_barrier(conf_t *conf) | |||
714 | wake_up(&conf->wait_barrier); | 708 | wake_up(&conf->wait_barrier); |
715 | } | 709 | } |
716 | 710 | ||
711 | static void freeze_array(conf_t *conf) | ||
712 | { | ||
713 | /* stop syncio and normal IO and wait for everything to | ||
714 | * go quite. | ||
715 | * We increment barrier and nr_waiting, and then | ||
716 | * wait until barrier+nr_pending match nr_queued+2 | ||
717 | */ | ||
718 | spin_lock_irq(&conf->resync_lock); | ||
719 | conf->barrier++; | ||
720 | conf->nr_waiting++; | ||
721 | wait_event_lock_irq(conf->wait_barrier, | ||
722 | conf->barrier+conf->nr_pending == conf->nr_queued+2, | ||
723 | conf->resync_lock, | ||
724 | raid10_unplug(conf->mddev->queue)); | ||
725 | spin_unlock_irq(&conf->resync_lock); | ||
726 | } | ||
727 | |||
728 | static void unfreeze_array(conf_t *conf) | ||
729 | { | ||
730 | /* reverse the effect of the freeze */ | ||
731 | spin_lock_irq(&conf->resync_lock); | ||
732 | conf->barrier--; | ||
733 | conf->nr_waiting--; | ||
734 | wake_up(&conf->wait_barrier); | ||
735 | spin_unlock_irq(&conf->resync_lock); | ||
736 | } | ||
737 | |||
717 | static int make_request(request_queue_t *q, struct bio * bio) | 738 | static int make_request(request_queue_t *q, struct bio * bio) |
718 | { | 739 | { |
719 | mddev_t *mddev = q->queuedata; | 740 | mddev_t *mddev = q->queuedata; |
@@ -1338,6 +1359,7 @@ static void raid10d(mddev_t *mddev) | |||
1338 | break; | 1359 | break; |
1339 | r10_bio = list_entry(head->prev, r10bio_t, retry_list); | 1360 | r10_bio = list_entry(head->prev, r10bio_t, retry_list); |
1340 | list_del(head->prev); | 1361 | list_del(head->prev); |
1362 | conf->nr_queued--; | ||
1341 | spin_unlock_irqrestore(&conf->device_lock, flags); | 1363 | spin_unlock_irqrestore(&conf->device_lock, flags); |
1342 | 1364 | ||
1343 | mddev = r10_bio->mddev; | 1365 | mddev = r10_bio->mddev; |
@@ -1350,6 +1372,78 @@ static void raid10d(mddev_t *mddev) | |||
1350 | unplug = 1; | 1372 | unplug = 1; |
1351 | } else { | 1373 | } else { |
1352 | int mirror; | 1374 | int mirror; |
1375 | /* we got a read error. Maybe the drive is bad. Maybe just | ||
1376 | * the block and we can fix it. | ||
1377 | * We freeze all other IO, and try reading the block from | ||
1378 | * other devices. When we find one, we re-write | ||
1379 | * and check it that fixes the read error. | ||
1380 | * This is all done synchronously while the array is | ||
1381 | * frozen. | ||
1382 | */ | ||
1383 | int sect = 0; /* Offset from r10_bio->sector */ | ||
1384 | int sectors = r10_bio->sectors; | ||
1385 | freeze_array(conf); | ||
1386 | if (mddev->ro == 0) while(sectors) { | ||
1387 | int s = sectors; | ||
1388 | int sl = r10_bio->read_slot; | ||
1389 | int success = 0; | ||
1390 | |||
1391 | if (s > (PAGE_SIZE>>9)) | ||
1392 | s = PAGE_SIZE >> 9; | ||
1393 | |||
1394 | do { | ||
1395 | int d = r10_bio->devs[sl].devnum; | ||
1396 | rdev = conf->mirrors[d].rdev; | ||
1397 | if (rdev && | ||
1398 | test_bit(In_sync, &rdev->flags) && | ||
1399 | sync_page_io(rdev->bdev, | ||
1400 | r10_bio->devs[sl].addr + | ||
1401 | sect + rdev->data_offset, | ||
1402 | s<<9, | ||
1403 | conf->tmppage, READ)) | ||
1404 | success = 1; | ||
1405 | else { | ||
1406 | sl++; | ||
1407 | if (sl == conf->copies) | ||
1408 | sl = 0; | ||
1409 | } | ||
1410 | } while (!success && sl != r10_bio->read_slot); | ||
1411 | |||
1412 | if (success) { | ||
1413 | /* write it back and re-read */ | ||
1414 | while (sl != r10_bio->read_slot) { | ||
1415 | int d; | ||
1416 | if (sl==0) | ||
1417 | sl = conf->copies; | ||
1418 | sl--; | ||
1419 | d = r10_bio->devs[sl].devnum; | ||
1420 | rdev = conf->mirrors[d].rdev; | ||
1421 | if (rdev && | ||
1422 | test_bit(In_sync, &rdev->flags)) { | ||
1423 | if (sync_page_io(rdev->bdev, | ||
1424 | r10_bio->devs[sl].addr + | ||
1425 | sect + rdev->data_offset, | ||
1426 | s<<9, conf->tmppage, WRITE) == 0 || | ||
1427 | sync_page_io(rdev->bdev, | ||
1428 | r10_bio->devs[sl].addr + | ||
1429 | sect + rdev->data_offset, | ||
1430 | s<<9, conf->tmppage, READ) == 0) { | ||
1431 | /* Well, this device is dead */ | ||
1432 | md_error(mddev, rdev); | ||
1433 | } | ||
1434 | } | ||
1435 | } | ||
1436 | } else { | ||
1437 | /* Cannot read from anywhere -- bye bye array */ | ||
1438 | md_error(mddev, conf->mirrors[r10_bio->devs[r10_bio->read_slot].devnum].rdev); | ||
1439 | break; | ||
1440 | } | ||
1441 | sectors -= s; | ||
1442 | sect += s; | ||
1443 | } | ||
1444 | |||
1445 | unfreeze_array(conf); | ||
1446 | |||
1353 | bio = r10_bio->devs[r10_bio->read_slot].bio; | 1447 | bio = r10_bio->devs[r10_bio->read_slot].bio; |
1354 | r10_bio->devs[r10_bio->read_slot].bio = NULL; | 1448 | r10_bio->devs[r10_bio->read_slot].bio = NULL; |
1355 | bio_put(bio); | 1449 | bio_put(bio); |
@@ -1793,22 +1887,24 @@ static int run(mddev_t *mddev) | |||
1793 | * bookkeeping area. [whatever we allocate in run(), | 1887 | * bookkeeping area. [whatever we allocate in run(), |
1794 | * should be freed in stop()] | 1888 | * should be freed in stop()] |
1795 | */ | 1889 | */ |
1796 | conf = kmalloc(sizeof(conf_t), GFP_KERNEL); | 1890 | conf = kzalloc(sizeof(conf_t), GFP_KERNEL); |
1797 | mddev->private = conf; | 1891 | mddev->private = conf; |
1798 | if (!conf) { | 1892 | if (!conf) { |
1799 | printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", | 1893 | printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", |
1800 | mdname(mddev)); | 1894 | mdname(mddev)); |
1801 | goto out; | 1895 | goto out; |
1802 | } | 1896 | } |
1803 | memset(conf, 0, sizeof(*conf)); | 1897 | conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, |
1804 | conf->mirrors = kmalloc(sizeof(struct mirror_info)*mddev->raid_disks, | ||
1805 | GFP_KERNEL); | 1898 | GFP_KERNEL); |
1806 | if (!conf->mirrors) { | 1899 | if (!conf->mirrors) { |
1807 | printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", | 1900 | printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", |
1808 | mdname(mddev)); | 1901 | mdname(mddev)); |
1809 | goto out_free_conf; | 1902 | goto out_free_conf; |
1810 | } | 1903 | } |
1811 | memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks); | 1904 | |
1905 | conf->tmppage = alloc_page(GFP_KERNEL); | ||
1906 | if (!conf->tmppage) | ||
1907 | goto out_free_conf; | ||
1812 | 1908 | ||
1813 | conf->near_copies = nc; | 1909 | conf->near_copies = nc; |
1814 | conf->far_copies = fc; | 1910 | conf->far_copies = fc; |
@@ -1918,6 +2014,7 @@ static int run(mddev_t *mddev) | |||
1918 | out_free_conf: | 2014 | out_free_conf: |
1919 | if (conf->r10bio_pool) | 2015 | if (conf->r10bio_pool) |
1920 | mempool_destroy(conf->r10bio_pool); | 2016 | mempool_destroy(conf->r10bio_pool); |
2017 | put_page(conf->tmppage); | ||
1921 | kfree(conf->mirrors); | 2018 | kfree(conf->mirrors); |
1922 | kfree(conf); | 2019 | kfree(conf); |
1923 | mddev->private = NULL; | 2020 | mddev->private = NULL; |
diff --git a/include/linux/raid/raid10.h b/include/linux/raid/raid10.h index b660cbf628d8..dfa528385e3f 100644 --- a/include/linux/raid/raid10.h +++ b/include/linux/raid/raid10.h | |||
@@ -42,6 +42,7 @@ struct r10_private_data_s { | |||
42 | spinlock_t resync_lock; | 42 | spinlock_t resync_lock; |
43 | int nr_pending; | 43 | int nr_pending; |
44 | int nr_waiting; | 44 | int nr_waiting; |
45 | int nr_queued; | ||
45 | int barrier; | 46 | int barrier; |
46 | sector_t next_resync; | 47 | sector_t next_resync; |
47 | int fullsync; /* set to 1 if a full sync is needed, | 48 | int fullsync; /* set to 1 if a full sync is needed, |
@@ -53,6 +54,7 @@ struct r10_private_data_s { | |||
53 | 54 | ||
54 | mempool_t *r10bio_pool; | 55 | mempool_t *r10bio_pool; |
55 | mempool_t *r10buf_pool; | 56 | mempool_t *r10buf_pool; |
57 | struct page *tmppage; | ||
56 | }; | 58 | }; |
57 | 59 | ||
58 | typedef struct r10_private_data_s conf_t; | 60 | typedef struct r10_private_data_s conf_t; |