aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid10.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r--drivers/md/raid10.c127
1 files changed, 112 insertions, 15 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 1fa70c34b7d2..64bb4ddc6798 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -209,6 +209,7 @@ static void reschedule_retry(r10bio_t *r10_bio)
209 209
210 spin_lock_irqsave(&conf->device_lock, flags); 210 spin_lock_irqsave(&conf->device_lock, flags);
211 list_add(&r10_bio->retry_list, &conf->retry_list); 211 list_add(&r10_bio->retry_list, &conf->retry_list);
212 conf->nr_queued ++;
212 spin_unlock_irqrestore(&conf->device_lock, flags); 213 spin_unlock_irqrestore(&conf->device_lock, flags);
213 214
214 md_wakeup_thread(mddev->thread); 215 md_wakeup_thread(mddev->thread);
@@ -254,9 +255,9 @@ static int raid10_end_read_request(struct bio *bio, unsigned int bytes_done, int
254 /* 255 /*
255 * this branch is our 'one mirror IO has finished' event handler: 256 * this branch is our 'one mirror IO has finished' event handler:
256 */ 257 */
257 if (!uptodate) 258 update_head_pos(slot, r10_bio);
258 md_error(r10_bio->mddev, conf->mirrors[dev].rdev); 259
259 else 260 if (uptodate) {
260 /* 261 /*
261 * Set R10BIO_Uptodate in our master bio, so that 262 * Set R10BIO_Uptodate in our master bio, so that
262 * we will return a good error code to the higher 263 * we will return a good error code to the higher
@@ -267,15 +268,8 @@ static int raid10_end_read_request(struct bio *bio, unsigned int bytes_done, int
267 * wait for the 'master' bio. 268 * wait for the 'master' bio.
268 */ 269 */
269 set_bit(R10BIO_Uptodate, &r10_bio->state); 270 set_bit(R10BIO_Uptodate, &r10_bio->state);
270
271 update_head_pos(slot, r10_bio);
272
273 /*
274 * we have only one bio on the read side
275 */
276 if (uptodate)
277 raid_end_bio_io(r10_bio); 271 raid_end_bio_io(r10_bio);
278 else { 272 } else {
279 /* 273 /*
280 * oops, read error: 274 * oops, read error:
281 */ 275 */
@@ -714,6 +708,33 @@ static void allow_barrier(conf_t *conf)
714 wake_up(&conf->wait_barrier); 708 wake_up(&conf->wait_barrier);
715} 709}
716 710
711static void freeze_array(conf_t *conf)
712{
713 /* stop syncio and normal IO and wait for everything to
714 * go quite.
715 * We increment barrier and nr_waiting, and then
716 * wait until barrier+nr_pending match nr_queued+2
717 */
718 spin_lock_irq(&conf->resync_lock);
719 conf->barrier++;
720 conf->nr_waiting++;
721 wait_event_lock_irq(conf->wait_barrier,
722 conf->barrier+conf->nr_pending == conf->nr_queued+2,
723 conf->resync_lock,
724 raid10_unplug(conf->mddev->queue));
725 spin_unlock_irq(&conf->resync_lock);
726}
727
728static void unfreeze_array(conf_t *conf)
729{
730 /* reverse the effect of the freeze */
731 spin_lock_irq(&conf->resync_lock);
732 conf->barrier--;
733 conf->nr_waiting--;
734 wake_up(&conf->wait_barrier);
735 spin_unlock_irq(&conf->resync_lock);
736}
737
717static int make_request(request_queue_t *q, struct bio * bio) 738static int make_request(request_queue_t *q, struct bio * bio)
718{ 739{
719 mddev_t *mddev = q->queuedata; 740 mddev_t *mddev = q->queuedata;
@@ -1338,6 +1359,7 @@ static void raid10d(mddev_t *mddev)
1338 break; 1359 break;
1339 r10_bio = list_entry(head->prev, r10bio_t, retry_list); 1360 r10_bio = list_entry(head->prev, r10bio_t, retry_list);
1340 list_del(head->prev); 1361 list_del(head->prev);
1362 conf->nr_queued--;
1341 spin_unlock_irqrestore(&conf->device_lock, flags); 1363 spin_unlock_irqrestore(&conf->device_lock, flags);
1342 1364
1343 mddev = r10_bio->mddev; 1365 mddev = r10_bio->mddev;
@@ -1350,6 +1372,78 @@ static void raid10d(mddev_t *mddev)
1350 unplug = 1; 1372 unplug = 1;
1351 } else { 1373 } else {
1352 int mirror; 1374 int mirror;
1375 /* we got a read error. Maybe the drive is bad. Maybe just
1376 * the block and we can fix it.
1377 * We freeze all other IO, and try reading the block from
1378 * other devices. When we find one, we re-write
1379 * and check it that fixes the read error.
1380 * This is all done synchronously while the array is
1381 * frozen.
1382 */
1383 int sect = 0; /* Offset from r10_bio->sector */
1384 int sectors = r10_bio->sectors;
1385 freeze_array(conf);
1386 if (mddev->ro == 0) while(sectors) {
1387 int s = sectors;
1388 int sl = r10_bio->read_slot;
1389 int success = 0;
1390
1391 if (s > (PAGE_SIZE>>9))
1392 s = PAGE_SIZE >> 9;
1393
1394 do {
1395 int d = r10_bio->devs[sl].devnum;
1396 rdev = conf->mirrors[d].rdev;
1397 if (rdev &&
1398 test_bit(In_sync, &rdev->flags) &&
1399 sync_page_io(rdev->bdev,
1400 r10_bio->devs[sl].addr +
1401 sect + rdev->data_offset,
1402 s<<9,
1403 conf->tmppage, READ))
1404 success = 1;
1405 else {
1406 sl++;
1407 if (sl == conf->copies)
1408 sl = 0;
1409 }
1410 } while (!success && sl != r10_bio->read_slot);
1411
1412 if (success) {
1413 /* write it back and re-read */
1414 while (sl != r10_bio->read_slot) {
1415 int d;
1416 if (sl==0)
1417 sl = conf->copies;
1418 sl--;
1419 d = r10_bio->devs[sl].devnum;
1420 rdev = conf->mirrors[d].rdev;
1421 if (rdev &&
1422 test_bit(In_sync, &rdev->flags)) {
1423 if (sync_page_io(rdev->bdev,
1424 r10_bio->devs[sl].addr +
1425 sect + rdev->data_offset,
1426 s<<9, conf->tmppage, WRITE) == 0 ||
1427 sync_page_io(rdev->bdev,
1428 r10_bio->devs[sl].addr +
1429 sect + rdev->data_offset,
1430 s<<9, conf->tmppage, READ) == 0) {
1431 /* Well, this device is dead */
1432 md_error(mddev, rdev);
1433 }
1434 }
1435 }
1436 } else {
1437 /* Cannot read from anywhere -- bye bye array */
1438 md_error(mddev, conf->mirrors[r10_bio->devs[r10_bio->read_slot].devnum].rdev);
1439 break;
1440 }
1441 sectors -= s;
1442 sect += s;
1443 }
1444
1445 unfreeze_array(conf);
1446
1353 bio = r10_bio->devs[r10_bio->read_slot].bio; 1447 bio = r10_bio->devs[r10_bio->read_slot].bio;
1354 r10_bio->devs[r10_bio->read_slot].bio = NULL; 1448 r10_bio->devs[r10_bio->read_slot].bio = NULL;
1355 bio_put(bio); 1449 bio_put(bio);
@@ -1793,22 +1887,24 @@ static int run(mddev_t *mddev)
1793 * bookkeeping area. [whatever we allocate in run(), 1887 * bookkeeping area. [whatever we allocate in run(),
1794 * should be freed in stop()] 1888 * should be freed in stop()]
1795 */ 1889 */
1796 conf = kmalloc(sizeof(conf_t), GFP_KERNEL); 1890 conf = kzalloc(sizeof(conf_t), GFP_KERNEL);
1797 mddev->private = conf; 1891 mddev->private = conf;
1798 if (!conf) { 1892 if (!conf) {
1799 printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", 1893 printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",
1800 mdname(mddev)); 1894 mdname(mddev));
1801 goto out; 1895 goto out;
1802 } 1896 }
1803 memset(conf, 0, sizeof(*conf)); 1897 conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks,
1804 conf->mirrors = kmalloc(sizeof(struct mirror_info)*mddev->raid_disks,
1805 GFP_KERNEL); 1898 GFP_KERNEL);
1806 if (!conf->mirrors) { 1899 if (!conf->mirrors) {
1807 printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", 1900 printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",
1808 mdname(mddev)); 1901 mdname(mddev));
1809 goto out_free_conf; 1902 goto out_free_conf;
1810 } 1903 }
1811 memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks); 1904
1905 conf->tmppage = alloc_page(GFP_KERNEL);
1906 if (!conf->tmppage)
1907 goto out_free_conf;
1812 1908
1813 conf->near_copies = nc; 1909 conf->near_copies = nc;
1814 conf->far_copies = fc; 1910 conf->far_copies = fc;
@@ -1918,6 +2014,7 @@ static int run(mddev_t *mddev)
1918out_free_conf: 2014out_free_conf:
1919 if (conf->r10bio_pool) 2015 if (conf->r10bio_pool)
1920 mempool_destroy(conf->r10bio_pool); 2016 mempool_destroy(conf->r10bio_pool);
2017 put_page(conf->tmppage);
1921 kfree(conf->mirrors); 2018 kfree(conf->mirrors);
1922 kfree(conf); 2019 kfree(conf);
1923 mddev->private = NULL; 2020 mddev->private = NULL;