1 files changed, 90 insertions, 11 deletions
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 9978b9f07fe9..ec6d675bf766 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -146,6 +146,7 @@ struct mirror_set {
        region_t nr_regions;
        int in_sync;
        int log_failure;
+        atomic_t suspend;
        atomic_t default_mirror;        /* Default mirror */
@@ -372,6 +373,16 @@ static void complete_resync_work(struct region *reg, int success)
        struct region_hash *rh = reg->rh;
        rh->log->type->set_region_sync(rh->log, reg->key, success);
+        /*
+         * Dispatch the bios before we call 'wake_up_all'.
+         * This is important because if we are suspending,
+         * we want to know that recovery is complete and
+         * the work queue is flushed.  If we wake_up_all
+         * before we dispatch_bios (queue bios and call wake()),
+         * then we risk suspending before the work queue
+         * has been properly flushed.
+         */
        dispatch_bios(rh->ms, &reg->delayed_bios);
        if (atomic_dec_and_test(&rh->recovery_in_flight))
                wake_up_all(&_kmirrord_recovery_stopped);
@@ -1069,11 +1080,13 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
        /*
         * Dispatch io.
         */
-        if (unlikely(ms->log_failure))
+        if (unlikely(ms->log_failure)) {
+                spin_lock_irq(&ms->lock);
+                bio_list_merge(&ms->failures, &sync);
+                spin_unlock_irq(&ms->lock);
+        } else
                while ((bio = bio_list_pop(&sync)))
-                        bio_endio(bio, -EIO);
+                        do_write(ms, bio);
-        else while ((bio = bio_list_pop(&sync)))
-                do_write(ms, bio);
        while ((bio = bio_list_pop(&recover)))
                rh_delay(&ms->rh, bio);
@@ -1091,8 +1104,46 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures)
        if (!failures->head)
                return;
-        while ((bio = bio_list_pop(failures)))
+        if (!ms->log_failure) {
-                __bio_mark_nosync(ms, bio, bio->bi_size, 0);
+                while ((bio = bio_list_pop(failures)))
+                        __bio_mark_nosync(ms, bio, bio->bi_size, 0);
+                return;
+        }
+        /*
+         * If the log has failed, unattempted writes are being
+         * put on the failures list.  We can't issue those writes
+         * until a log has been marked, so we must store them.
+         *
+         * If a 'noflush' suspend is in progress, we can requeue
+         * the I/O's to the core.  This give userspace a chance
+         * to reconfigure the mirror, at which point the core
+         * will reissue the writes.  If the 'noflush' flag is
+         * not set, we have no choice but to return errors.
+         *
+         * Some writes on the failures list may have been
+         * submitted before the log failure and represent a
+         * failure to write to one of the devices.  It is ok
+         * for us to treat them the same and requeue them
+         * as well.
+         */
+        if (dm_noflush_suspending(ms->ti)) {
+                while ((bio = bio_list_pop(failures)))
+                        bio_endio(bio, DM_ENDIO_REQUEUE);
+                return;
+        }
+        if (atomic_read(&ms->suspend)) {
+                while ((bio = bio_list_pop(failures)))
+                        bio_endio(bio, -EIO);
+                return;
+        }
+        spin_lock_irq(&ms->lock);
+        bio_list_merge(&ms->failures, failures);
+        spin_unlock_irq(&ms->lock);
+        wake(ms);
 }
 static void trigger_event(struct work_struct *work)
@@ -1176,6 +1227,8 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
        ms->nr_mirrors = nr_mirrors;
        ms->nr_regions = dm_sector_div_up(ti->len, region_size);
        ms->in_sync = 0;
+        ms->log_failure = 0;
+        atomic_set(&ms->suspend, 0);
        atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
        ms->io_client = dm_io_client_create(DM_IO_PAGES);
@@ -1511,26 +1564,51 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
        return 0;
 }
-static void mirror_postsuspend(struct dm_target *ti)
+static void mirror_presuspend(struct dm_target *ti)
 {
        struct mirror_set *ms = (struct mirror_set *) ti->private;
        struct dirty_log *log = ms->rh.log;
+        atomic_set(&ms->suspend, 1);
+        /*
+         * We must finish up all the work that we've
+         * generated (i.e. recovery work).
+         */
        rh_stop_recovery(&ms->rh);
-        /* Wait for all I/O we generated to complete */
        wait_event(_kmirrord_recovery_stopped,
                   !atomic_read(&ms->rh.recovery_in_flight));
+        if (log->type->presuspend && log->type->presuspend(log))
+                /* FIXME: need better error handling */
+                DMWARN("log presuspend failed");
+        /*
+         * Now that recovery is complete/stopped and the
+         * delayed bios are queued, we need to wait for
+         * the worker thread to complete.  This way,
+         * we know that all of our I/O has been pushed.
+         */
+        flush_workqueue(ms->kmirrord_wq);
+}
+static void mirror_postsuspend(struct dm_target *ti)
+{
+        struct mirror_set *ms = ti->private;
+        struct dirty_log *log = ms->rh.log;
        if (log->type->postsuspend && log->type->postsuspend(log))
                /* FIXME: need better error handling */
-                DMWARN("log suspend failed");
+                DMWARN("log postsuspend failed");
 }
 static void mirror_resume(struct dm_target *ti)
 {
-        struct mirror_set *ms = (struct mirror_set *) ti->private;
+        struct mirror_set *ms = ti->private;
        struct dirty_log *log = ms->rh.log;
+        atomic_set(&ms->suspend, 0);
        if (log->type->resume && log->type->resume(log))
                /* FIXME: need better error handling */
                DMWARN("log resume failed");
@@ -1564,7 +1642,7 @@ static int mirror_status(struct dm_target *ti, status_type_t type,
                DMEMIT("%d", ms->nr_mirrors);
                for (m = 0; m < ms->nr_mirrors; m++)
                        DMEMIT(" %s %llu", ms->mirror[m].dev->name,
-                                (unsigned long long)ms->mirror[m].offset);
+                               (unsigned long long)ms->mirror[m].offset);
                if (ms->features & DM_RAID1_HANDLE_ERRORS)
                        DMEMIT(" 1 handle_errors");
@@ -1581,6 +1659,7 @@ static struct target_type mirror_target = {
        .dtr     = mirror_dtr,
        .map     = mirror_map,
        .end_io  = mirror_end_io,
+        .presuspend = mirror_presuspend,
        .postsuspend = mirror_postsuspend,
        .resume  = mirror_resume,
        .status  = mirror_status,

diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 9978b9f07fe9..ec6d675bf766 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c
@@ -146,6 +146,7 @@ struct mirror_set {
146	region_t nr_regions;	146	region_t nr_regions;
147	int in_sync;	147	int in_sync;
148	int log_failure;	148	int log_failure;
		149	atomic_t suspend;
149		150
150	atomic_t default_mirror; /* Default mirror */	151	atomic_t default_mirror; /* Default mirror */
151		152
@@ -372,6 +373,16 @@ static void complete_resync_work(struct region *reg, int success)
372	struct region_hash *rh = reg->rh;	373	struct region_hash *rh = reg->rh;
373		374
374	rh->log->type->set_region_sync(rh->log, reg->key, success);	375	rh->log->type->set_region_sync(rh->log, reg->key, success);
		376
		377	/*
		378	* Dispatch the bios before we call 'wake_up_all'.
		379	* This is important because if we are suspending,
		380	* we want to know that recovery is complete and
		381	* the work queue is flushed. If we wake_up_all
		382	* before we dispatch_bios (queue bios and call wake()),
		383	* then we risk suspending before the work queue
		384	* has been properly flushed.
		385	*/
375	dispatch_bios(rh->ms, &reg->delayed_bios);	386	dispatch_bios(rh->ms, &reg->delayed_bios);
376	if (atomic_dec_and_test(&rh->recovery_in_flight))	387	if (atomic_dec_and_test(&rh->recovery_in_flight))
377	wake_up_all(&_kmirrord_recovery_stopped);	388	wake_up_all(&_kmirrord_recovery_stopped);
@@ -1069,11 +1080,13 @@ static void do_writes(struct mirror_set ms, struct bio_list writes)
1069	/*	1080	/*
1070	* Dispatch io.	1081	* Dispatch io.
1071	*/	1082	*/
1072	if (unlikely(ms->log_failure))	1083	if (unlikely(ms->log_failure)) {
		1084	spin_lock_irq(&ms->lock);
		1085	bio_list_merge(&ms->failures, &sync);
		1086	spin_unlock_irq(&ms->lock);
		1087	} else
1073	while ((bio = bio_list_pop(&sync)))	1088	while ((bio = bio_list_pop(&sync)))
1074	bio_endio(bio, -EIO);	1089	do_write(ms, bio);
1075	else while ((bio = bio_list_pop(&sync)))
1076	do_write(ms, bio);
1077		1090
1078	while ((bio = bio_list_pop(&recover)))	1091	while ((bio = bio_list_pop(&recover)))
1079	rh_delay(&ms->rh, bio);	1092	rh_delay(&ms->rh, bio);
@@ -1091,8 +1104,46 @@ static void do_failures(struct mirror_set ms, struct bio_list failures)
1091	if (!failures->head)	1104	if (!failures->head)
1092	return;	1105	return;
1093		1106
1094	while ((bio = bio_list_pop(failures)))	1107	if (!ms->log_failure) {
1095	__bio_mark_nosync(ms, bio, bio->bi_size, 0);	1108	while ((bio = bio_list_pop(failures)))
		1109	__bio_mark_nosync(ms, bio, bio->bi_size, 0);
		1110	return;
		1111	}
		1112
		1113	/*
		1114	* If the log has failed, unattempted writes are being
		1115	* put on the failures list. We can't issue those writes
		1116	* until a log has been marked, so we must store them.
		1117	*
		1118	* If a 'noflush' suspend is in progress, we can requeue
		1119	* the I/O's to the core. This give userspace a chance
		1120	* to reconfigure the mirror, at which point the core
		1121	* will reissue the writes. If the 'noflush' flag is
		1122	* not set, we have no choice but to return errors.
		1123	*
		1124	* Some writes on the failures list may have been
		1125	* submitted before the log failure and represent a
		1126	* failure to write to one of the devices. It is ok
		1127	* for us to treat them the same and requeue them
		1128	* as well.
		1129	*/
		1130	if (dm_noflush_suspending(ms->ti)) {
		1131	while ((bio = bio_list_pop(failures)))
		1132	bio_endio(bio, DM_ENDIO_REQUEUE);
		1133	return;
		1134	}
		1135
		1136	if (atomic_read(&ms->suspend)) {
		1137	while ((bio = bio_list_pop(failures)))
		1138	bio_endio(bio, -EIO);
		1139	return;
		1140	}
		1141
		1142	spin_lock_irq(&ms->lock);
		1143	bio_list_merge(&ms->failures, failures);
		1144	spin_unlock_irq(&ms->lock);
		1145
		1146	wake(ms);
1096	}	1147	}
1097		1148
1098	static void trigger_event(struct work_struct *work)	1149	static void trigger_event(struct work_struct *work)
@@ -1176,6 +1227,8 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
1176	ms->nr_mirrors = nr_mirrors;	1227	ms->nr_mirrors = nr_mirrors;
1177	ms->nr_regions = dm_sector_div_up(ti->len, region_size);	1228	ms->nr_regions = dm_sector_div_up(ti->len, region_size);
1178	ms->in_sync = 0;	1229	ms->in_sync = 0;
		1230	ms->log_failure = 0;
		1231	atomic_set(&ms->suspend, 0);
1179	atomic_set(&ms->default_mirror, DEFAULT_MIRROR);	1232	atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
1180		1233
1181	ms->io_client = dm_io_client_create(DM_IO_PAGES);	1234	ms->io_client = dm_io_client_create(DM_IO_PAGES);
@@ -1511,26 +1564,51 @@ static int mirror_end_io(struct dm_target ti, struct bio bio,
1511	return 0;	1564	return 0;
1512	}	1565	}
1513		1566
1514	static void mirror_postsuspend(struct dm_target *ti)	1567	static void mirror_presuspend(struct dm_target *ti)
1515	{	1568	{
1516	struct mirror_set ms = (struct mirror_set ) ti->private;	1569	struct mirror_set ms = (struct mirror_set ) ti->private;
1517	struct dirty_log *log = ms->rh.log;	1570	struct dirty_log *log = ms->rh.log;
1518		1571
		1572	atomic_set(&ms->suspend, 1);
		1573
		1574	/*
		1575	* We must finish up all the work that we've
		1576	* generated (i.e. recovery work).
		1577	*/
1519	rh_stop_recovery(&ms->rh);	1578	rh_stop_recovery(&ms->rh);
1520		1579
1521	/* Wait for all I/O we generated to complete */
1522	wait_event(_kmirrord_recovery_stopped,	1580	wait_event(_kmirrord_recovery_stopped,
1523	!atomic_read(&ms->rh.recovery_in_flight));	1581	!atomic_read(&ms->rh.recovery_in_flight));
1524		1582
		1583	if (log->type->presuspend && log->type->presuspend(log))
		1584	/* FIXME: need better error handling */
		1585	DMWARN("log presuspend failed");
		1586
		1587	/*
		1588	* Now that recovery is complete/stopped and the
		1589	* delayed bios are queued, we need to wait for
		1590	* the worker thread to complete. This way,
		1591	* we know that all of our I/O has been pushed.
		1592	*/
		1593	flush_workqueue(ms->kmirrord_wq);
		1594	}
		1595
		1596	static void mirror_postsuspend(struct dm_target *ti)
		1597	{
		1598	struct mirror_set *ms = ti->private;
		1599	struct dirty_log *log = ms->rh.log;
		1600
1525	if (log->type->postsuspend && log->type->postsuspend(log))	1601	if (log->type->postsuspend && log->type->postsuspend(log))
1526	/* FIXME: need better error handling */	1602	/* FIXME: need better error handling */
1527	DMWARN("log suspend failed");	1603	DMWARN("log postsuspend failed");
1528	}	1604	}
1529		1605
1530	static void mirror_resume(struct dm_target *ti)	1606	static void mirror_resume(struct dm_target *ti)
1531	{	1607	{
1532	struct mirror_set ms = (struct mirror_set ) ti->private;	1608	struct mirror_set *ms = ti->private;
1533	struct dirty_log *log = ms->rh.log;	1609	struct dirty_log *log = ms->rh.log;
		1610
		1611	atomic_set(&ms->suspend, 0);
1534	if (log->type->resume && log->type->resume(log))	1612	if (log->type->resume && log->type->resume(log))
1535	/* FIXME: need better error handling */	1613	/* FIXME: need better error handling */
1536	DMWARN("log resume failed");	1614	DMWARN("log resume failed");
@@ -1564,7 +1642,7 @@ static int mirror_status(struct dm_target *ti, status_type_t type,
1564	DMEMIT("%d", ms->nr_mirrors);	1642	DMEMIT("%d", ms->nr_mirrors);
1565	for (m = 0; m < ms->nr_mirrors; m++)	1643	for (m = 0; m < ms->nr_mirrors; m++)
1566	DMEMIT(" %s %llu", ms->mirror[m].dev->name,	1644	DMEMIT(" %s %llu", ms->mirror[m].dev->name,
1567	(unsigned long long)ms->mirror[m].offset);	1645	(unsigned long long)ms->mirror[m].offset);
1568		1646
1569	if (ms->features & DM_RAID1_HANDLE_ERRORS)	1647	if (ms->features & DM_RAID1_HANDLE_ERRORS)
1570	DMEMIT(" 1 handle_errors");	1648	DMEMIT(" 1 handle_errors");
@@ -1581,6 +1659,7 @@ static struct target_type mirror_target = {
1581	.dtr = mirror_dtr,	1659	.dtr = mirror_dtr,
1582	.map = mirror_map,	1660	.map = mirror_map,
1583	.end_io = mirror_end_io,	1661	.end_io = mirror_end_io,
		1662	.presuspend = mirror_presuspend,
1584	.postsuspend = mirror_postsuspend,	1663	.postsuspend = mirror_postsuspend,
1585	.resume = mirror_resume,	1664	.resume = mirror_resume,
1586	.status = mirror_status,	1665	.status = mirror_status,