diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/dm-raid1.c | 101 |
1 files changed, 90 insertions, 11 deletions
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 9978b9f07fe9..ec6d675bf766 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
@@ -146,6 +146,7 @@ struct mirror_set { | |||
146 | region_t nr_regions; | 146 | region_t nr_regions; |
147 | int in_sync; | 147 | int in_sync; |
148 | int log_failure; | 148 | int log_failure; |
149 | atomic_t suspend; | ||
149 | 150 | ||
150 | atomic_t default_mirror; /* Default mirror */ | 151 | atomic_t default_mirror; /* Default mirror */ |
151 | 152 | ||
@@ -372,6 +373,16 @@ static void complete_resync_work(struct region *reg, int success) | |||
372 | struct region_hash *rh = reg->rh; | 373 | struct region_hash *rh = reg->rh; |
373 | 374 | ||
374 | rh->log->type->set_region_sync(rh->log, reg->key, success); | 375 | rh->log->type->set_region_sync(rh->log, reg->key, success); |
376 | |||
377 | /* | ||
378 | * Dispatch the bios before we call 'wake_up_all'. | ||
379 | * This is important because if we are suspending, | ||
380 | * we want to know that recovery is complete and | ||
381 | * the work queue is flushed. If we wake_up_all | ||
382 | * before we dispatch_bios (queue bios and call wake()), | ||
383 | * then we risk suspending before the work queue | ||
384 | * has been properly flushed. | ||
385 | */ | ||
375 | dispatch_bios(rh->ms, ®->delayed_bios); | 386 | dispatch_bios(rh->ms, ®->delayed_bios); |
376 | if (atomic_dec_and_test(&rh->recovery_in_flight)) | 387 | if (atomic_dec_and_test(&rh->recovery_in_flight)) |
377 | wake_up_all(&_kmirrord_recovery_stopped); | 388 | wake_up_all(&_kmirrord_recovery_stopped); |
@@ -1069,11 +1080,13 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) | |||
1069 | /* | 1080 | /* |
1070 | * Dispatch io. | 1081 | * Dispatch io. |
1071 | */ | 1082 | */ |
1072 | if (unlikely(ms->log_failure)) | 1083 | if (unlikely(ms->log_failure)) { |
1084 | spin_lock_irq(&ms->lock); | ||
1085 | bio_list_merge(&ms->failures, &sync); | ||
1086 | spin_unlock_irq(&ms->lock); | ||
1087 | } else | ||
1073 | while ((bio = bio_list_pop(&sync))) | 1088 | while ((bio = bio_list_pop(&sync))) |
1074 | bio_endio(bio, -EIO); | 1089 | do_write(ms, bio); |
1075 | else while ((bio = bio_list_pop(&sync))) | ||
1076 | do_write(ms, bio); | ||
1077 | 1090 | ||
1078 | while ((bio = bio_list_pop(&recover))) | 1091 | while ((bio = bio_list_pop(&recover))) |
1079 | rh_delay(&ms->rh, bio); | 1092 | rh_delay(&ms->rh, bio); |
@@ -1091,8 +1104,46 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures) | |||
1091 | if (!failures->head) | 1104 | if (!failures->head) |
1092 | return; | 1105 | return; |
1093 | 1106 | ||
1094 | while ((bio = bio_list_pop(failures))) | 1107 | if (!ms->log_failure) { |
1095 | __bio_mark_nosync(ms, bio, bio->bi_size, 0); | 1108 | while ((bio = bio_list_pop(failures))) |
1109 | __bio_mark_nosync(ms, bio, bio->bi_size, 0); | ||
1110 | return; | ||
1111 | } | ||
1112 | |||
1113 | /* | ||
1114 | * If the log has failed, unattempted writes are being | ||
1115 | * put on the failures list. We can't issue those writes | ||
1116 | * until a log has been marked, so we must store them. | ||
1117 | * | ||
1118 | * If a 'noflush' suspend is in progress, we can requeue | ||
1119 | * the I/O's to the core. This give userspace a chance | ||
1120 | * to reconfigure the mirror, at which point the core | ||
1121 | * will reissue the writes. If the 'noflush' flag is | ||
1122 | * not set, we have no choice but to return errors. | ||
1123 | * | ||
1124 | * Some writes on the failures list may have been | ||
1125 | * submitted before the log failure and represent a | ||
1126 | * failure to write to one of the devices. It is ok | ||
1127 | * for us to treat them the same and requeue them | ||
1128 | * as well. | ||
1129 | */ | ||
1130 | if (dm_noflush_suspending(ms->ti)) { | ||
1131 | while ((bio = bio_list_pop(failures))) | ||
1132 | bio_endio(bio, DM_ENDIO_REQUEUE); | ||
1133 | return; | ||
1134 | } | ||
1135 | |||
1136 | if (atomic_read(&ms->suspend)) { | ||
1137 | while ((bio = bio_list_pop(failures))) | ||
1138 | bio_endio(bio, -EIO); | ||
1139 | return; | ||
1140 | } | ||
1141 | |||
1142 | spin_lock_irq(&ms->lock); | ||
1143 | bio_list_merge(&ms->failures, failures); | ||
1144 | spin_unlock_irq(&ms->lock); | ||
1145 | |||
1146 | wake(ms); | ||
1096 | } | 1147 | } |
1097 | 1148 | ||
1098 | static void trigger_event(struct work_struct *work) | 1149 | static void trigger_event(struct work_struct *work) |
@@ -1176,6 +1227,8 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, | |||
1176 | ms->nr_mirrors = nr_mirrors; | 1227 | ms->nr_mirrors = nr_mirrors; |
1177 | ms->nr_regions = dm_sector_div_up(ti->len, region_size); | 1228 | ms->nr_regions = dm_sector_div_up(ti->len, region_size); |
1178 | ms->in_sync = 0; | 1229 | ms->in_sync = 0; |
1230 | ms->log_failure = 0; | ||
1231 | atomic_set(&ms->suspend, 0); | ||
1179 | atomic_set(&ms->default_mirror, DEFAULT_MIRROR); | 1232 | atomic_set(&ms->default_mirror, DEFAULT_MIRROR); |
1180 | 1233 | ||
1181 | ms->io_client = dm_io_client_create(DM_IO_PAGES); | 1234 | ms->io_client = dm_io_client_create(DM_IO_PAGES); |
@@ -1511,26 +1564,51 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, | |||
1511 | return 0; | 1564 | return 0; |
1512 | } | 1565 | } |
1513 | 1566 | ||
1514 | static void mirror_postsuspend(struct dm_target *ti) | 1567 | static void mirror_presuspend(struct dm_target *ti) |
1515 | { | 1568 | { |
1516 | struct mirror_set *ms = (struct mirror_set *) ti->private; | 1569 | struct mirror_set *ms = (struct mirror_set *) ti->private; |
1517 | struct dirty_log *log = ms->rh.log; | 1570 | struct dirty_log *log = ms->rh.log; |
1518 | 1571 | ||
1572 | atomic_set(&ms->suspend, 1); | ||
1573 | |||
1574 | /* | ||
1575 | * We must finish up all the work that we've | ||
1576 | * generated (i.e. recovery work). | ||
1577 | */ | ||
1519 | rh_stop_recovery(&ms->rh); | 1578 | rh_stop_recovery(&ms->rh); |
1520 | 1579 | ||
1521 | /* Wait for all I/O we generated to complete */ | ||
1522 | wait_event(_kmirrord_recovery_stopped, | 1580 | wait_event(_kmirrord_recovery_stopped, |
1523 | !atomic_read(&ms->rh.recovery_in_flight)); | 1581 | !atomic_read(&ms->rh.recovery_in_flight)); |
1524 | 1582 | ||
1583 | if (log->type->presuspend && log->type->presuspend(log)) | ||
1584 | /* FIXME: need better error handling */ | ||
1585 | DMWARN("log presuspend failed"); | ||
1586 | |||
1587 | /* | ||
1588 | * Now that recovery is complete/stopped and the | ||
1589 | * delayed bios are queued, we need to wait for | ||
1590 | * the worker thread to complete. This way, | ||
1591 | * we know that all of our I/O has been pushed. | ||
1592 | */ | ||
1593 | flush_workqueue(ms->kmirrord_wq); | ||
1594 | } | ||
1595 | |||
1596 | static void mirror_postsuspend(struct dm_target *ti) | ||
1597 | { | ||
1598 | struct mirror_set *ms = ti->private; | ||
1599 | struct dirty_log *log = ms->rh.log; | ||
1600 | |||
1525 | if (log->type->postsuspend && log->type->postsuspend(log)) | 1601 | if (log->type->postsuspend && log->type->postsuspend(log)) |
1526 | /* FIXME: need better error handling */ | 1602 | /* FIXME: need better error handling */ |
1527 | DMWARN("log suspend failed"); | 1603 | DMWARN("log postsuspend failed"); |
1528 | } | 1604 | } |
1529 | 1605 | ||
1530 | static void mirror_resume(struct dm_target *ti) | 1606 | static void mirror_resume(struct dm_target *ti) |
1531 | { | 1607 | { |
1532 | struct mirror_set *ms = (struct mirror_set *) ti->private; | 1608 | struct mirror_set *ms = ti->private; |
1533 | struct dirty_log *log = ms->rh.log; | 1609 | struct dirty_log *log = ms->rh.log; |
1610 | |||
1611 | atomic_set(&ms->suspend, 0); | ||
1534 | if (log->type->resume && log->type->resume(log)) | 1612 | if (log->type->resume && log->type->resume(log)) |
1535 | /* FIXME: need better error handling */ | 1613 | /* FIXME: need better error handling */ |
1536 | DMWARN("log resume failed"); | 1614 | DMWARN("log resume failed"); |
@@ -1564,7 +1642,7 @@ static int mirror_status(struct dm_target *ti, status_type_t type, | |||
1564 | DMEMIT("%d", ms->nr_mirrors); | 1642 | DMEMIT("%d", ms->nr_mirrors); |
1565 | for (m = 0; m < ms->nr_mirrors; m++) | 1643 | for (m = 0; m < ms->nr_mirrors; m++) |
1566 | DMEMIT(" %s %llu", ms->mirror[m].dev->name, | 1644 | DMEMIT(" %s %llu", ms->mirror[m].dev->name, |
1567 | (unsigned long long)ms->mirror[m].offset); | 1645 | (unsigned long long)ms->mirror[m].offset); |
1568 | 1646 | ||
1569 | if (ms->features & DM_RAID1_HANDLE_ERRORS) | 1647 | if (ms->features & DM_RAID1_HANDLE_ERRORS) |
1570 | DMEMIT(" 1 handle_errors"); | 1648 | DMEMIT(" 1 handle_errors"); |
@@ -1581,6 +1659,7 @@ static struct target_type mirror_target = { | |||
1581 | .dtr = mirror_dtr, | 1659 | .dtr = mirror_dtr, |
1582 | .map = mirror_map, | 1660 | .map = mirror_map, |
1583 | .end_io = mirror_end_io, | 1661 | .end_io = mirror_end_io, |
1662 | .presuspend = mirror_presuspend, | ||
1584 | .postsuspend = mirror_postsuspend, | 1663 | .postsuspend = mirror_postsuspend, |
1585 | .resume = mirror_resume, | 1664 | .resume = mirror_resume, |
1586 | .status = mirror_status, | 1665 | .status = mirror_status, |