aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-raid1.c101
1 files changed, 90 insertions, 11 deletions
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 9978b9f07fe9..ec6d675bf766 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -146,6 +146,7 @@ struct mirror_set {
146 region_t nr_regions; 146 region_t nr_regions;
147 int in_sync; 147 int in_sync;
148 int log_failure; 148 int log_failure;
149 atomic_t suspend;
149 150
150 atomic_t default_mirror; /* Default mirror */ 151 atomic_t default_mirror; /* Default mirror */
151 152
@@ -372,6 +373,16 @@ static void complete_resync_work(struct region *reg, int success)
372 struct region_hash *rh = reg->rh; 373 struct region_hash *rh = reg->rh;
373 374
374 rh->log->type->set_region_sync(rh->log, reg->key, success); 375 rh->log->type->set_region_sync(rh->log, reg->key, success);
376
377 /*
378 * Dispatch the bios before we call 'wake_up_all'.
379 * This is important because if we are suspending,
380 * we want to know that recovery is complete and
381 * the work queue is flushed. If we wake_up_all
382 * before we dispatch_bios (queue bios and call wake()),
383 * then we risk suspending before the work queue
384 * has been properly flushed.
385 */
375 dispatch_bios(rh->ms, &reg->delayed_bios); 386 dispatch_bios(rh->ms, &reg->delayed_bios);
376 if (atomic_dec_and_test(&rh->recovery_in_flight)) 387 if (atomic_dec_and_test(&rh->recovery_in_flight))
377 wake_up_all(&_kmirrord_recovery_stopped); 388 wake_up_all(&_kmirrord_recovery_stopped);
@@ -1069,11 +1080,13 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
1069 /* 1080 /*
1070 * Dispatch io. 1081 * Dispatch io.
1071 */ 1082 */
1072 if (unlikely(ms->log_failure)) 1083 if (unlikely(ms->log_failure)) {
1084 spin_lock_irq(&ms->lock);
1085 bio_list_merge(&ms->failures, &sync);
1086 spin_unlock_irq(&ms->lock);
1087 } else
1073 while ((bio = bio_list_pop(&sync))) 1088 while ((bio = bio_list_pop(&sync)))
1074 bio_endio(bio, -EIO); 1089 do_write(ms, bio);
1075 else while ((bio = bio_list_pop(&sync)))
1076 do_write(ms, bio);
1077 1090
1078 while ((bio = bio_list_pop(&recover))) 1091 while ((bio = bio_list_pop(&recover)))
1079 rh_delay(&ms->rh, bio); 1092 rh_delay(&ms->rh, bio);
@@ -1091,8 +1104,46 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures)
1091 if (!failures->head) 1104 if (!failures->head)
1092 return; 1105 return;
1093 1106
1094 while ((bio = bio_list_pop(failures))) 1107 if (!ms->log_failure) {
1095 __bio_mark_nosync(ms, bio, bio->bi_size, 0); 1108 while ((bio = bio_list_pop(failures)))
1109 __bio_mark_nosync(ms, bio, bio->bi_size, 0);
1110 return;
1111 }
1112
1113 /*
1114 * If the log has failed, unattempted writes are being
1115 * put on the failures list. We can't issue those writes
1116 * until a log has been marked, so we must store them.
1117 *
1118 * If a 'noflush' suspend is in progress, we can requeue
1119 * the I/O's to the core. This give userspace a chance
1120 * to reconfigure the mirror, at which point the core
1121 * will reissue the writes. If the 'noflush' flag is
1122 * not set, we have no choice but to return errors.
1123 *
1124 * Some writes on the failures list may have been
1125 * submitted before the log failure and represent a
1126 * failure to write to one of the devices. It is ok
1127 * for us to treat them the same and requeue them
1128 * as well.
1129 */
1130 if (dm_noflush_suspending(ms->ti)) {
1131 while ((bio = bio_list_pop(failures)))
1132 bio_endio(bio, DM_ENDIO_REQUEUE);
1133 return;
1134 }
1135
1136 if (atomic_read(&ms->suspend)) {
1137 while ((bio = bio_list_pop(failures)))
1138 bio_endio(bio, -EIO);
1139 return;
1140 }
1141
1142 spin_lock_irq(&ms->lock);
1143 bio_list_merge(&ms->failures, failures);
1144 spin_unlock_irq(&ms->lock);
1145
1146 wake(ms);
1096} 1147}
1097 1148
1098static void trigger_event(struct work_struct *work) 1149static void trigger_event(struct work_struct *work)
@@ -1176,6 +1227,8 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
1176 ms->nr_mirrors = nr_mirrors; 1227 ms->nr_mirrors = nr_mirrors;
1177 ms->nr_regions = dm_sector_div_up(ti->len, region_size); 1228 ms->nr_regions = dm_sector_div_up(ti->len, region_size);
1178 ms->in_sync = 0; 1229 ms->in_sync = 0;
1230 ms->log_failure = 0;
1231 atomic_set(&ms->suspend, 0);
1179 atomic_set(&ms->default_mirror, DEFAULT_MIRROR); 1232 atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
1180 1233
1181 ms->io_client = dm_io_client_create(DM_IO_PAGES); 1234 ms->io_client = dm_io_client_create(DM_IO_PAGES);
@@ -1511,26 +1564,51 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
1511 return 0; 1564 return 0;
1512} 1565}
1513 1566
1514static void mirror_postsuspend(struct dm_target *ti) 1567static void mirror_presuspend(struct dm_target *ti)
1515{ 1568{
1516 struct mirror_set *ms = (struct mirror_set *) ti->private; 1569 struct mirror_set *ms = (struct mirror_set *) ti->private;
1517 struct dirty_log *log = ms->rh.log; 1570 struct dirty_log *log = ms->rh.log;
1518 1571
1572 atomic_set(&ms->suspend, 1);
1573
1574 /*
1575 * We must finish up all the work that we've
1576 * generated (i.e. recovery work).
1577 */
1519 rh_stop_recovery(&ms->rh); 1578 rh_stop_recovery(&ms->rh);
1520 1579
1521 /* Wait for all I/O we generated to complete */
1522 wait_event(_kmirrord_recovery_stopped, 1580 wait_event(_kmirrord_recovery_stopped,
1523 !atomic_read(&ms->rh.recovery_in_flight)); 1581 !atomic_read(&ms->rh.recovery_in_flight));
1524 1582
1583 if (log->type->presuspend && log->type->presuspend(log))
1584 /* FIXME: need better error handling */
1585 DMWARN("log presuspend failed");
1586
1587 /*
1588 * Now that recovery is complete/stopped and the
1589 * delayed bios are queued, we need to wait for
1590 * the worker thread to complete. This way,
1591 * we know that all of our I/O has been pushed.
1592 */
1593 flush_workqueue(ms->kmirrord_wq);
1594}
1595
1596static void mirror_postsuspend(struct dm_target *ti)
1597{
1598 struct mirror_set *ms = ti->private;
1599 struct dirty_log *log = ms->rh.log;
1600
1525 if (log->type->postsuspend && log->type->postsuspend(log)) 1601 if (log->type->postsuspend && log->type->postsuspend(log))
1526 /* FIXME: need better error handling */ 1602 /* FIXME: need better error handling */
1527 DMWARN("log suspend failed"); 1603 DMWARN("log postsuspend failed");
1528} 1604}
1529 1605
1530static void mirror_resume(struct dm_target *ti) 1606static void mirror_resume(struct dm_target *ti)
1531{ 1607{
1532 struct mirror_set *ms = (struct mirror_set *) ti->private; 1608 struct mirror_set *ms = ti->private;
1533 struct dirty_log *log = ms->rh.log; 1609 struct dirty_log *log = ms->rh.log;
1610
1611 atomic_set(&ms->suspend, 0);
1534 if (log->type->resume && log->type->resume(log)) 1612 if (log->type->resume && log->type->resume(log))
1535 /* FIXME: need better error handling */ 1613 /* FIXME: need better error handling */
1536 DMWARN("log resume failed"); 1614 DMWARN("log resume failed");
@@ -1564,7 +1642,7 @@ static int mirror_status(struct dm_target *ti, status_type_t type,
1564 DMEMIT("%d", ms->nr_mirrors); 1642 DMEMIT("%d", ms->nr_mirrors);
1565 for (m = 0; m < ms->nr_mirrors; m++) 1643 for (m = 0; m < ms->nr_mirrors; m++)
1566 DMEMIT(" %s %llu", ms->mirror[m].dev->name, 1644 DMEMIT(" %s %llu", ms->mirror[m].dev->name,
1567 (unsigned long long)ms->mirror[m].offset); 1645 (unsigned long long)ms->mirror[m].offset);
1568 1646
1569 if (ms->features & DM_RAID1_HANDLE_ERRORS) 1647 if (ms->features & DM_RAID1_HANDLE_ERRORS)
1570 DMEMIT(" 1 handle_errors"); 1648 DMEMIT(" 1 handle_errors");
@@ -1581,6 +1659,7 @@ static struct target_type mirror_target = {
1581 .dtr = mirror_dtr, 1659 .dtr = mirror_dtr,
1582 .map = mirror_map, 1660 .map = mirror_map,
1583 .end_io = mirror_end_io, 1661 .end_io = mirror_end_io,
1662 .presuspend = mirror_presuspend,
1584 .postsuspend = mirror_postsuspend, 1663 .postsuspend = mirror_postsuspend,
1585 .resume = mirror_resume, 1664 .resume = mirror_resume,
1586 .status = mirror_status, 1665 .status = mirror_status,