aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/md/md.c1
-rw-r--r--drivers/md/raid1.c115
-rw-r--r--include/linux/raid/raid1.h3
3 files changed, 109 insertions, 10 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 64e7da3701a5..1364a1c97e6f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -461,6 +461,7 @@ int sync_page_io(struct block_device *bdev, sector_t sector, int size,
461 bio_put(bio); 461 bio_put(bio);
462 return ret; 462 return ret;
463} 463}
464EXPORT_SYMBOL(sync_page_io);
464 465
465static int read_disk_sb(mdk_rdev_t * rdev, int size) 466static int read_disk_sb(mdk_rdev_t * rdev, int size)
466{ 467{
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index c618015f07f6..b3856db8d6c2 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -191,6 +191,7 @@ static void reschedule_retry(r1bio_t *r1_bio)
191 191
192 spin_lock_irqsave(&conf->device_lock, flags); 192 spin_lock_irqsave(&conf->device_lock, flags);
193 list_add(&r1_bio->retry_list, &conf->retry_list); 193 list_add(&r1_bio->retry_list, &conf->retry_list);
194 conf->nr_queued ++;
194 spin_unlock_irqrestore(&conf->device_lock, flags); 195 spin_unlock_irqrestore(&conf->device_lock, flags);
195 196
196 wake_up(&conf->wait_barrier); 197 wake_up(&conf->wait_barrier);
@@ -245,9 +246,9 @@ static int raid1_end_read_request(struct bio *bio, unsigned int bytes_done, int
245 /* 246 /*
246 * this branch is our 'one mirror IO has finished' event handler: 247 * this branch is our 'one mirror IO has finished' event handler:
247 */ 248 */
248 if (!uptodate) 249 update_head_pos(mirror, r1_bio);
249 md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); 250
250 else 251 if (uptodate || conf->working_disks <= 1) {
251 /* 252 /*
252 * Set R1BIO_Uptodate in our master bio, so that 253 * Set R1BIO_Uptodate in our master bio, so that
253 * we will return a good error code for to the higher 254 * we will return a good error code for to the higher
@@ -259,14 +260,8 @@ static int raid1_end_read_request(struct bio *bio, unsigned int bytes_done, int
259 */ 260 */
260 set_bit(R1BIO_Uptodate, &r1_bio->state); 261 set_bit(R1BIO_Uptodate, &r1_bio->state);
261 262
262 update_head_pos(mirror, r1_bio);
263
264 /*
265 * we have only one bio on the read side
266 */
267 if (uptodate)
268 raid_end_bio_io(r1_bio); 263 raid_end_bio_io(r1_bio);
269 else { 264 } else {
270 /* 265 /*
271 * oops, read error: 266 * oops, read error:
272 */ 267 */
@@ -653,6 +648,32 @@ static void allow_barrier(conf_t *conf)
653 wake_up(&conf->wait_barrier); 648 wake_up(&conf->wait_barrier);
654} 649}
655 650
651static void freeze_array(conf_t *conf)
652{
653 /* stop syncio and normal IO and wait for everything to
654 * go quite.
655 * We increment barrier and nr_waiting, and then
656 * wait until barrier+nr_pending match nr_queued+2
657 */
658 spin_lock_irq(&conf->resync_lock);
659 conf->barrier++;
660 conf->nr_waiting++;
661 wait_event_lock_irq(conf->wait_barrier,
662 conf->barrier+conf->nr_pending == conf->nr_queued+2,
663 conf->resync_lock,
664 raid1_unplug(conf->mddev->queue));
665 spin_unlock_irq(&conf->resync_lock);
666}
667static void unfreeze_array(conf_t *conf)
668{
669 /* reverse the effect of the freeze */
670 spin_lock_irq(&conf->resync_lock);
671 conf->barrier--;
672 conf->nr_waiting--;
673 wake_up(&conf->wait_barrier);
674 spin_unlock_irq(&conf->resync_lock);
675}
676
656 677
657/* duplicate the data pages for behind I/O */ 678/* duplicate the data pages for behind I/O */
658static struct page **alloc_behind_pages(struct bio *bio) 679static struct page **alloc_behind_pages(struct bio *bio)
@@ -1196,6 +1217,7 @@ static void raid1d(mddev_t *mddev)
1196 break; 1217 break;
1197 r1_bio = list_entry(head->prev, r1bio_t, retry_list); 1218 r1_bio = list_entry(head->prev, r1bio_t, retry_list);
1198 list_del(head->prev); 1219 list_del(head->prev);
1220 conf->nr_queued--;
1199 spin_unlock_irqrestore(&conf->device_lock, flags); 1221 spin_unlock_irqrestore(&conf->device_lock, flags);
1200 1222
1201 mddev = r1_bio->mddev; 1223 mddev = r1_bio->mddev;
@@ -1235,6 +1257,74 @@ static void raid1d(mddev_t *mddev)
1235 } 1257 }
1236 } else { 1258 } else {
1237 int disk; 1259 int disk;
1260
1261 /* we got a read error. Maybe the drive is bad. Maybe just
1262 * the block and we can fix it.
1263 * We freeze all other IO, and try reading the block from
1264 * other devices. When we find one, we re-write
1265 * and check it that fixes the read error.
1266 * This is all done synchronously while the array is
1267 * frozen
1268 */
1269 sector_t sect = r1_bio->sector;
1270 int sectors = r1_bio->sectors;
1271 freeze_array(conf);
1272 while(sectors) {
1273 int s = sectors;
1274 int d = r1_bio->read_disk;
1275 int success = 0;
1276
1277 if (s > (PAGE_SIZE>>9))
1278 s = PAGE_SIZE >> 9;
1279
1280 do {
1281 rdev = conf->mirrors[d].rdev;
1282 if (rdev &&
1283 test_bit(In_sync, &rdev->flags) &&
1284 sync_page_io(rdev->bdev,
1285 sect + rdev->data_offset,
1286 s<<9,
1287 conf->tmppage, READ))
1288 success = 1;
1289 else {
1290 d++;
1291 if (d == conf->raid_disks)
1292 d = 0;
1293 }
1294 } while (!success && d != r1_bio->read_disk);
1295
1296 if (success) {
1297 /* write it back and re-read */
1298 while (d != r1_bio->read_disk) {
1299 if (d==0)
1300 d = conf->raid_disks;
1301 d--;
1302 rdev = conf->mirrors[d].rdev;
1303 if (rdev &&
1304 test_bit(In_sync, &rdev->flags)) {
1305 if (sync_page_io(rdev->bdev,
1306 sect + rdev->data_offset,
1307 s<<9, conf->tmppage, WRITE) == 0 ||
1308 sync_page_io(rdev->bdev,
1309 sect + rdev->data_offset,
1310 s<<9, conf->tmppage, READ) == 0) {
1311 /* Well, this device is dead */
1312 md_error(mddev, rdev);
1313 }
1314 }
1315 }
1316 } else {
1317 /* Cannot read from anywhere -- bye bye array */
1318 md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
1319 break;
1320 }
1321 sectors -= s;
1322 sect += s;
1323 }
1324
1325
1326 unfreeze_array(conf);
1327
1238 bio = r1_bio->bios[r1_bio->read_disk]; 1328 bio = r1_bio->bios[r1_bio->read_disk];
1239 if ((disk=read_balance(conf, r1_bio)) == -1) { 1329 if ((disk=read_balance(conf, r1_bio)) == -1) {
1240 printk(KERN_ALERT "raid1: %s: unrecoverable I/O" 1330 printk(KERN_ALERT "raid1: %s: unrecoverable I/O"
@@ -1529,6 +1619,10 @@ static int run(mddev_t *mddev)
1529 1619
1530 memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks); 1620 memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks);
1531 1621
1622 conf->tmppage = alloc_page(GFP_KERNEL);
1623 if (!conf->tmppage)
1624 goto out_no_mem;
1625
1532 conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL); 1626 conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL);
1533 if (!conf->poolinfo) 1627 if (!conf->poolinfo)
1534 goto out_no_mem; 1628 goto out_no_mem;
@@ -1635,6 +1729,7 @@ out_free_conf:
1635 if (conf->r1bio_pool) 1729 if (conf->r1bio_pool)
1636 mempool_destroy(conf->r1bio_pool); 1730 mempool_destroy(conf->r1bio_pool);
1637 kfree(conf->mirrors); 1731 kfree(conf->mirrors);
1732 __free_page(conf->tmppage);
1638 kfree(conf->poolinfo); 1733 kfree(conf->poolinfo);
1639 kfree(conf); 1734 kfree(conf);
1640 mddev->private = NULL; 1735 mddev->private = NULL;
diff --git a/include/linux/raid/raid1.h b/include/linux/raid/raid1.h
index c55674252533..cbe4238d3f9f 100644
--- a/include/linux/raid/raid1.h
+++ b/include/linux/raid/raid1.h
@@ -46,6 +46,7 @@ struct r1_private_data_s {
46 spinlock_t resync_lock; 46 spinlock_t resync_lock;
47 int nr_pending; 47 int nr_pending;
48 int nr_waiting; 48 int nr_waiting;
49 int nr_queued;
49 int barrier; 50 int barrier;
50 sector_t next_resync; 51 sector_t next_resync;
51 int fullsync; /* set to 1 if a full sync is needed, 52 int fullsync; /* set to 1 if a full sync is needed,
@@ -57,6 +58,8 @@ struct r1_private_data_s {
57 58
58 struct pool_info *poolinfo; 59 struct pool_info *poolinfo;
59 60
61 struct page *tmppage;
62
60 mempool_t *r1bio_pool; 63 mempool_t *r1bio_pool;
61 mempool_t *r1buf_pool; 64 mempool_t *r1buf_pool;
62}; 65};