1 files changed, 33 insertions, 184 deletions
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index efd6169acf2f..d299fe9e78c8 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -49,11 +49,6 @@
 #include "drbd_vli.h"
-struct flush_work {
-        struct drbd_work w;
-        struct drbd_epoch *epoch;
-};
 enum finish_epoch {
        FE_STILL_LIVE,
        FE_DESTROYED,
@@ -66,16 +61,6 @@ static int drbd_do_auth(struct drbd_conf *mdev);
 static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
 static int e_end_block(struct drbd_conf *, struct drbd_work *, int);
-static struct drbd_epoch *previous_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch)
-{
-        struct drbd_epoch *prev;
-        spin_lock(&mdev->epoch_lock);
-        prev = list_entry(epoch->list.prev, struct drbd_epoch, list);
-        if (prev == epoch || prev == mdev->current_epoch)
-                prev = NULL;
-        spin_unlock(&mdev->epoch_lock);
-        return prev;
-}
 #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
@@ -981,7 +966,7 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi
        return TRUE;
 }
-static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch)
+static void drbd_flush(struct drbd_conf *mdev)
 {
        int rv;
@@ -997,24 +982,6 @@ static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d
                }
                put_ldev(mdev);
        }
-        return drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE);
-}
-static int w_flush(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
-{
-        struct flush_work *fw = (struct flush_work *)w;
-        struct drbd_epoch *epoch = fw->epoch;
-        kfree(w);
-        if (!test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags))
-                drbd_flush_after_epoch(mdev, epoch);
-        drbd_may_finish_epoch(mdev, epoch, EV_PUT |
-                              (mdev->state.conn < C_CONNECTED ? EV_CLEANUP : 0));
-        return 1;
 }
 /**
@@ -1027,15 +994,13 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
                                               struct drbd_epoch *epoch,
                                               enum epoch_event ev)
 {
-        int finish, epoch_size;
+        int epoch_size;
        struct drbd_epoch *next_epoch;
-        int schedule_flush = 0;
        enum finish_epoch rv = FE_STILL_LIVE;
        spin_lock(&mdev->epoch_lock);
        do {
                next_epoch = NULL;
-                finish = 0;
                epoch_size = atomic_read(&epoch->epoch_size);
@@ -1045,16 +1010,6 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
                        break;
                case EV_GOT_BARRIER_NR:
                        set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
-                        /* Special case: If we just switched from WO_bio_barrier to
-                           WO_bdev_flush we should not finish the current epoch */
-                        if (test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags) && epoch_size == 1 &&
-                            mdev->write_ordering != WO_bio_barrier &&
-                            epoch == mdev->current_epoch)
-                                clear_bit(DE_CONTAINS_A_BARRIER, &epoch->flags);
-                        break;
-                case EV_BARRIER_DONE:
-                        set_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags);
                        break;
                case EV_BECAME_LAST:
                        /* nothing to do*/
@@ -1063,23 +1018,7 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
                if (epoch_size != 0 &&
                    atomic_read(&epoch->active) == 0 &&
-                    test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) &&
+                    test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
-                    epoch->list.prev == &mdev->current_epoch->list &&
-                    !test_bit(DE_IS_FINISHING, &epoch->flags)) {
-                        /* Nearly all conditions are met to finish that epoch... */
-                        if (test_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags) ||
-                            mdev->write_ordering == WO_none ||
-                            (epoch_size == 1 && test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) ||
-                            ev & EV_CLEANUP) {
-                                finish = 1;
-                                set_bit(DE_IS_FINISHING, &epoch->flags);
-                        } else if (!test_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags) &&
-                                 mdev->write_ordering == WO_bio_barrier) {
-                                atomic_inc(&epoch->active);
-                                schedule_flush = 1;
-                        }
-                }
-                if (finish) {
                        if (!(ev & EV_CLEANUP)) {
                                spin_unlock(&mdev->epoch_lock);
                                drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
@@ -1102,6 +1041,7 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
                                /* atomic_set(&epoch->active, 0); is already zero */
                                if (rv == FE_STILL_LIVE)
                                        rv = FE_RECYCLED;
+                                wake_up(&mdev->ee_wait);
                        }
                }
@@ -1113,22 +1053,6 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
        spin_unlock(&mdev->epoch_lock);
-        if (schedule_flush) {
-                struct flush_work *fw;
-                fw = kmalloc(sizeof(*fw), GFP_ATOMIC);
-                if (fw) {
-                        fw->w.cb = w_flush;
-                        fw->epoch = epoch;
-                        drbd_queue_work(&mdev->data.work, &fw->w);
-                } else {
-                        dev_warn(DEV, "Could not kmalloc a flush_work obj\n");
-                        set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
-                        /* That is not a recursion, only one level */
-                        drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE);
-                        drbd_may_finish_epoch(mdev, epoch, EV_PUT);
-                }
-        }
        return rv;
 }
@@ -1144,19 +1068,16 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo)
                [WO_none] = "none",
                [WO_drain_io] = "drain",
                [WO_bdev_flush] = "flush",
-                [WO_bio_barrier] = "barrier",
        };
        pwo = mdev->write_ordering;
        wo = min(pwo, wo);
-        if (wo == WO_bio_barrier && mdev->ldev->dc.no_disk_barrier)
-                wo = WO_bdev_flush;
        if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
                wo = WO_drain_io;
        if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
                wo = WO_none;
        mdev->write_ordering = wo;
-        if (pwo != mdev->write_ordering || wo == WO_bio_barrier)
+        if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
                dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
 }
@@ -1192,7 +1113,7 @@ next_bio:
        bio->bi_sector = sector;
        bio->bi_bdev = mdev->ldev->backing_bdev;
        /* we special case some flags in the multi-bio case, see below
-         * (REQ_UNPLUG, REQ_HARDBARRIER) */
+         * (REQ_UNPLUG) */
        bio->bi_rw = rw;
        bio->bi_private = e;
        bio->bi_end_io = drbd_endio_sec;
@@ -1226,11 +1147,6 @@ next_bio:
                        bio->bi_rw &= ~REQ_UNPLUG;
                drbd_generic_make_request(mdev, fault_type, bio);
-                /* strip off REQ_HARDBARRIER,
-                 * unless it is the first or last bio */
-                if (bios && bios->bi_next)
-                        bios->bi_rw &= ~REQ_HARDBARRIER;
        } while (bios);
        maybe_kick_lo(mdev);
        return 0;
@@ -1244,45 +1160,9 @@ fail:
        return -ENOMEM;
 }
-/**
- * w_e_reissue() - Worker callback; Resubmit a bio, without REQ_HARDBARRIER set
- * @mdev:       DRBD device.
- * @w:          work object.
- * @cancel:     The connection will be closed anyways (unused in this callback)
- */
-int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __releases(local)
-{
-        struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
-        /* We leave DE_CONTAINS_A_BARRIER and EE_IS_BARRIER in place,
-           (and DE_BARRIER_IN_NEXT_EPOCH_ISSUED in the previous Epoch)
-           so that we can finish that epoch in drbd_may_finish_epoch().
-           That is necessary if we already have a long chain of Epochs, before
-           we realize that REQ_HARDBARRIER is actually not supported */
-        /* As long as the -ENOTSUPP on the barrier is reported immediately
-           that will never trigger. If it is reported late, we will just
-           print that warning and continue correctly for all future requests
-           with WO_bdev_flush */
-        if (previous_epoch(mdev, e->epoch))
-                dev_warn(DEV, "Write ordering was not enforced (one time event)\n");
-        /* we still have a local reference,
-         * get_ldev was done in receive_Data. */
-        e->w.cb = e_end_block;
-        if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_DT_WR) != 0) {
-                /* drbd_submit_ee fails for one reason only:
-                 * if was not able to allocate sufficient bios.
-                 * requeue, try again later. */
-                e->w.cb = w_e_reissue;
-                drbd_queue_work(&mdev->data.work, &e->w);
-        }
-        return 1;
-}
 static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
 {
-        int rv, issue_flush;
+        int rv;
        struct p_barrier *p = &mdev->data.rbuf.barrier;
        struct drbd_epoch *epoch;
@@ -1300,44 +1180,40 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign
         * Therefore we must send the barrier_ack after the barrier request was
         * completed. */
        switch (mdev->write_ordering) {
-        case WO_bio_barrier:
        case WO_none:
                if (rv == FE_RECYCLED)
                        return TRUE;
-                break;
+                /* receiver context, in the writeout path of the other node.
+                 * avoid potential distributed deadlock */
+                epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
+                if (epoch)
+                        break;
+                else
+                        dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
+                        /* Fall through */
        case WO_bdev_flush:
        case WO_drain_io:
-                if (rv == FE_STILL_LIVE) {
-                        set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &mdev->current_epoch->flags);
-                        drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
-                        rv = drbd_flush_after_epoch(mdev, mdev->current_epoch);
-                }
-                if (rv == FE_RECYCLED)
-                        return TRUE;
-                /* The asender will send all the ACKs and barrier ACKs out, since
-                   all EEs moved from the active_ee to the done_ee. We need to
-                   provide a new epoch object for the EEs that come in soon */
-                break;
-        }
-        /* receiver context, in the writeout path of the other node.
-         * avoid potential distributed deadlock */
-        epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
-        if (!epoch) {
-                dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
-                issue_flush = !test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &mdev->current_epoch->flags);
                drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
-                if (issue_flush) {
+                drbd_flush(mdev);
-                        rv = drbd_flush_after_epoch(mdev, mdev->current_epoch);
-                        if (rv == FE_RECYCLED)
+                if (atomic_read(&mdev->current_epoch->epoch_size)) {
-                                return TRUE;
+                        epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
+                        if (epoch)
+                                break;
                }
-                drbd_wait_ee_list_empty(mdev, &mdev->done_ee);
+                epoch = mdev->current_epoch;
+                wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
+                D_ASSERT(atomic_read(&epoch->active) == 0);
+                D_ASSERT(epoch->flags == 0);
                return TRUE;
+        default:
+                dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
+                return FALSE;
        }
        epoch->flags = 0;
@@ -1652,15 +1528,8 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 {
        struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
        sector_t sector = e->sector;
-        struct drbd_epoch *epoch;
        int ok = 1, pcmd;
-        if (e->flags & EE_IS_BARRIER) {
-                epoch = previous_epoch(mdev, e->epoch);
-                if (epoch)
-                        drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE + (cancel ? EV_CLEANUP : 0));
-        }
        if (mdev->net_conf->wire_protocol == DRBD_PROT_C) {
                if (likely((e->flags & EE_WAS_ERROR) == 0)) {
                        pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
@@ -1817,27 +1686,6 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
        e->epoch = mdev->current_epoch;
        atomic_inc(&e->epoch->epoch_size);
        atomic_inc(&e->epoch->active);
-        if (mdev->write_ordering == WO_bio_barrier && atomic_read(&e->epoch->epoch_size) == 1) {
-                struct drbd_epoch *epoch;
-                /* Issue a barrier if we start a new epoch, and the previous epoch
-                   was not a epoch containing a single request which already was
-                   a Barrier. */
-                epoch = list_entry(e->epoch->list.prev, struct drbd_epoch, list);
-                if (epoch == e->epoch) {
-                        set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags);
-                        rw |= REQ_HARDBARRIER;
-                        e->flags |= EE_IS_BARRIER;
-                } else {
-                        if (atomic_read(&epoch->epoch_size) > 1 ||
-                            !test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) {
-                                set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
-                                set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags);
-                                rw |= REQ_HARDBARRIER;
-                                e->flags |= EE_IS_BARRIER;
-                        }
-                }
-        }
        spin_unlock(&mdev->epoch_lock);
        dp_flags = be32_to_cpu(p->dp_flags);
@@ -1995,10 +1843,11 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
                break;
        }
-        if (mdev->state.pdsk == D_DISKLESS) {
+        if (mdev->state.pdsk < D_INCONSISTENT) {
                /* In case we have the only disk of the cluster, */
                drbd_set_out_of_sync(mdev, e->sector, e->size);
                e->flags |= EE_CALL_AL_COMPLETE_IO;
+                e->flags &= ~EE_MAY_SET_IN_SYNC;
                drbd_al_begin_io(mdev, e->sector);
        }
@@ -3362,7 +3211,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
                if (ns.conn == C_MASK) {
                        ns.conn = C_CONNECTED;
                        if (mdev->state.disk == D_NEGOTIATING) {
-                                drbd_force_state(mdev, NS(disk, D_DISKLESS));
+                                drbd_force_state(mdev, NS(disk, D_FAILED));
                        } else if (peer_state.disk == D_NEGOTIATING) {
                                dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
                                peer_state.disk = D_DISKLESS;

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index efd6169acf2f..d299fe9e78c8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c
@@ -49,11 +49,6 @@
49		49
50	#include "drbd_vli.h"	50	#include "drbd_vli.h"
51		51
52	struct flush_work {
53	struct drbd_work w;
54	struct drbd_epoch *epoch;
55	};
56
57	enum finish_epoch {	52	enum finish_epoch {
58	FE_STILL_LIVE,	53	FE_STILL_LIVE,
59	FE_DESTROYED,	54	FE_DESTROYED,
@@ -66,16 +61,6 @@ static int drbd_do_auth(struct drbd_conf *mdev);
66	static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf , struct drbd_epoch , enum epoch_event);	61	static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf , struct drbd_epoch , enum epoch_event);
67	static int e_end_block(struct drbd_conf , struct drbd_work , int);	62	static int e_end_block(struct drbd_conf , struct drbd_work , int);
68		63
69	static struct drbd_epoch previous_epoch(struct drbd_conf mdev, struct drbd_epoch *epoch)
70	{
71	struct drbd_epoch *prev;
72	spin_lock(&mdev->epoch_lock);
73	prev = list_entry(epoch->list.prev, struct drbd_epoch, list);
74	if (prev == epoch \|\| prev == mdev->current_epoch)
75	prev = NULL;
76	spin_unlock(&mdev->epoch_lock);
77	return prev;
78	}
79		64
80	#define GFP_TRY (__GFP_HIGHMEM \| __GFP_NOWARN)	65	#define GFP_TRY (__GFP_HIGHMEM \| __GFP_NOWARN)
81		66
@@ -981,7 +966,7 @@ static int drbd_recv_header(struct drbd_conf mdev, enum drbd_packets cmd, unsi
981	return TRUE;	966	return TRUE;
982	}	967	}
983		968
984	static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf mdev, struct drbd_epoch epoch)	969	static void drbd_flush(struct drbd_conf *mdev)
985	{	970	{
986	int rv;	971	int rv;
987		972
@@ -997,24 +982,6 @@ static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d
997	}	982	}
998	put_ldev(mdev);	983	put_ldev(mdev);
999	}	984	}
1000
1001	return drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE);
1002	}
1003
1004	static int w_flush(struct drbd_conf mdev, struct drbd_work w, int cancel)
1005	{
1006	struct flush_work fw = (struct flush_work )w;
1007	struct drbd_epoch *epoch = fw->epoch;
1008
1009	kfree(w);
1010
1011	if (!test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags))
1012	drbd_flush_after_epoch(mdev, epoch);
1013
1014	drbd_may_finish_epoch(mdev, epoch, EV_PUT \|
1015	(mdev->state.conn < C_CONNECTED ? EV_CLEANUP : 0));
1016
1017	return 1;
1018	}	985	}
1019		986
1020	/**	987	/**
@@ -1027,15 +994,13 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1027	struct drbd_epoch *epoch,	994	struct drbd_epoch *epoch,
1028	enum epoch_event ev)	995	enum epoch_event ev)
1029	{	996	{
1030	int finish, epoch_size;	997	int epoch_size;
1031	struct drbd_epoch *next_epoch;	998	struct drbd_epoch *next_epoch;
1032	int schedule_flush = 0;
1033	enum finish_epoch rv = FE_STILL_LIVE;	999	enum finish_epoch rv = FE_STILL_LIVE;
1034		1000
1035	spin_lock(&mdev->epoch_lock);	1001	spin_lock(&mdev->epoch_lock);
1036	do {	1002	do {
1037	next_epoch = NULL;	1003	next_epoch = NULL;
1038	finish = 0;
1039		1004
1040	epoch_size = atomic_read(&epoch->epoch_size);	1005	epoch_size = atomic_read(&epoch->epoch_size);
1041		1006
@@ -1045,16 +1010,6 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1045	break;	1010	break;
1046	case EV_GOT_BARRIER_NR:	1011	case EV_GOT_BARRIER_NR:
1047	set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);	1012	set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
1048
1049	/* Special case: If we just switched from WO_bio_barrier to
1050	WO_bdev_flush we should not finish the current epoch */
1051	if (test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags) && epoch_size == 1 &&
1052	mdev->write_ordering != WO_bio_barrier &&
1053	epoch == mdev->current_epoch)
1054	clear_bit(DE_CONTAINS_A_BARRIER, &epoch->flags);
1055	break;
1056	case EV_BARRIER_DONE:
1057	set_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags);
1058	break;	1013	break;
1059	case EV_BECAME_LAST:	1014	case EV_BECAME_LAST:
1060	/* nothing to do*/	1015	/* nothing to do*/
@@ -1063,23 +1018,7 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1063		1018
1064	if (epoch_size != 0 &&	1019	if (epoch_size != 0 &&
1065	atomic_read(&epoch->active) == 0 &&	1020	atomic_read(&epoch->active) == 0 &&
1066	test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) &&	1021	test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
1067	epoch->list.prev == &mdev->current_epoch->list &&
1068	!test_bit(DE_IS_FINISHING, &epoch->flags)) {
1069	/* Nearly all conditions are met to finish that epoch... */
1070	if (test_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags) \|\|
1071	mdev->write_ordering == WO_none \|\|
1072	(epoch_size == 1 && test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) \|\|
1073	ev & EV_CLEANUP) {
1074	finish = 1;
1075	set_bit(DE_IS_FINISHING, &epoch->flags);
1076	} else if (!test_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags) &&
1077	mdev->write_ordering == WO_bio_barrier) {
1078	atomic_inc(&epoch->active);
1079	schedule_flush = 1;
1080	}
1081	}
1082	if (finish) {
1083	if (!(ev & EV_CLEANUP)) {	1022	if (!(ev & EV_CLEANUP)) {
1084	spin_unlock(&mdev->epoch_lock);	1023	spin_unlock(&mdev->epoch_lock);
1085	drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);	1024	drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
@@ -1102,6 +1041,7 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1102	/* atomic_set(&epoch->active, 0); is already zero */	1041	/* atomic_set(&epoch->active, 0); is already zero */
1103	if (rv == FE_STILL_LIVE)	1042	if (rv == FE_STILL_LIVE)
1104	rv = FE_RECYCLED;	1043	rv = FE_RECYCLED;
		1044	wake_up(&mdev->ee_wait);
1105	}	1045	}
1106	}	1046	}
1107		1047
@@ -1113,22 +1053,6 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1113		1053
1114	spin_unlock(&mdev->epoch_lock);	1054	spin_unlock(&mdev->epoch_lock);
1115		1055
1116	if (schedule_flush) {
1117	struct flush_work *fw;
1118	fw = kmalloc(sizeof(*fw), GFP_ATOMIC);
1119	if (fw) {
1120	fw->w.cb = w_flush;
1121	fw->epoch = epoch;
1122	drbd_queue_work(&mdev->data.work, &fw->w);
1123	} else {
1124	dev_warn(DEV, "Could not kmalloc a flush_work obj\n");
1125	set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
1126	/* That is not a recursion, only one level */
1127	drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE);
1128	drbd_may_finish_epoch(mdev, epoch, EV_PUT);
1129	}
1130	}
1131
1132	return rv;	1056	return rv;
1133	}	1057	}
1134		1058
@@ -1144,19 +1068,16 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo)
1144	[WO_none] = "none",	1068	[WO_none] = "none",
1145	[WO_drain_io] = "drain",	1069	[WO_drain_io] = "drain",
1146	[WO_bdev_flush] = "flush",	1070	[WO_bdev_flush] = "flush",
1147	[WO_bio_barrier] = "barrier",
1148	};	1071	};
1149		1072
1150	pwo = mdev->write_ordering;	1073	pwo = mdev->write_ordering;
1151	wo = min(pwo, wo);	1074	wo = min(pwo, wo);
1152	if (wo == WO_bio_barrier && mdev->ldev->dc.no_disk_barrier)
1153	wo = WO_bdev_flush;
1154	if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)	1075	if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1155	wo = WO_drain_io;	1076	wo = WO_drain_io;
1156	if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)	1077	if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1157	wo = WO_none;	1078	wo = WO_none;
1158	mdev->write_ordering = wo;	1079	mdev->write_ordering = wo;
1159	if (pwo != mdev->write_ordering \|\| wo == WO_bio_barrier)	1080	if (pwo != mdev->write_ordering \|\| wo == WO_bdev_flush)
1160	dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);	1081	dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1161	}	1082	}
1162		1083
@@ -1192,7 +1113,7 @@ next_bio:
1192	bio->bi_sector = sector;	1113	bio->bi_sector = sector;
1193	bio->bi_bdev = mdev->ldev->backing_bdev;	1114	bio->bi_bdev = mdev->ldev->backing_bdev;
1194	/* we special case some flags in the multi-bio case, see below	1115	/* we special case some flags in the multi-bio case, see below
1195	* (REQ_UNPLUG, REQ_HARDBARRIER) */	1116	* (REQ_UNPLUG) */
1196	bio->bi_rw = rw;	1117	bio->bi_rw = rw;
1197	bio->bi_private = e;	1118	bio->bi_private = e;
1198	bio->bi_end_io = drbd_endio_sec;	1119	bio->bi_end_io = drbd_endio_sec;
@@ -1226,11 +1147,6 @@ next_bio:
1226	bio->bi_rw &= ~REQ_UNPLUG;	1147	bio->bi_rw &= ~REQ_UNPLUG;
1227		1148
1228	drbd_generic_make_request(mdev, fault_type, bio);	1149	drbd_generic_make_request(mdev, fault_type, bio);
1229
1230	/* strip off REQ_HARDBARRIER,
1231	* unless it is the first or last bio */
1232	if (bios && bios->bi_next)
1233	bios->bi_rw &= ~REQ_HARDBARRIER;
1234	} while (bios);	1150	} while (bios);
1235	maybe_kick_lo(mdev);	1151	maybe_kick_lo(mdev);
1236	return 0;	1152	return 0;
@@ -1244,45 +1160,9 @@ fail:
1244	return -ENOMEM;	1160	return -ENOMEM;
1245	}	1161	}
1246		1162
1247	/**
1248	* w_e_reissue() - Worker callback; Resubmit a bio, without REQ_HARDBARRIER set
1249	* @mdev: DRBD device.
1250	* @w: work object.
1251	* @cancel: The connection will be closed anyways (unused in this callback)
1252	*/
1253	int w_e_reissue(struct drbd_conf mdev, struct drbd_work w, int cancel) __releases(local)
1254	{
1255	struct drbd_epoch_entry e = (struct drbd_epoch_entry )w;
1256	/* We leave DE_CONTAINS_A_BARRIER and EE_IS_BARRIER in place,
1257	(and DE_BARRIER_IN_NEXT_EPOCH_ISSUED in the previous Epoch)
1258	so that we can finish that epoch in drbd_may_finish_epoch().
1259	That is necessary if we already have a long chain of Epochs, before
1260	we realize that REQ_HARDBARRIER is actually not supported */
1261
1262	/* As long as the -ENOTSUPP on the barrier is reported immediately
1263	that will never trigger. If it is reported late, we will just
1264	print that warning and continue correctly for all future requests
1265	with WO_bdev_flush */
1266	if (previous_epoch(mdev, e->epoch))
1267	dev_warn(DEV, "Write ordering was not enforced (one time event)\n");
1268
1269	/* we still have a local reference,
1270	* get_ldev was done in receive_Data. */
1271
1272	e->w.cb = e_end_block;
1273	if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_DT_WR) != 0) {
1274	/* drbd_submit_ee fails for one reason only:
1275	* if was not able to allocate sufficient bios.
1276	* requeue, try again later. */
1277	e->w.cb = w_e_reissue;
1278	drbd_queue_work(&mdev->data.work, &e->w);
1279	}
1280	return 1;
1281	}
1282
1283	static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)	1163	static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
1284	{	1164	{
1285	int rv, issue_flush;	1165	int rv;
1286	struct p_barrier *p = &mdev->data.rbuf.barrier;	1166	struct p_barrier *p = &mdev->data.rbuf.barrier;
1287	struct drbd_epoch *epoch;	1167	struct drbd_epoch *epoch;
1288		1168
@@ -1300,44 +1180,40 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign
1300	* Therefore we must send the barrier_ack after the barrier request was	1180	* Therefore we must send the barrier_ack after the barrier request was
1301	* completed. */	1181	* completed. */
1302	switch (mdev->write_ordering) {	1182	switch (mdev->write_ordering) {
1303	case WO_bio_barrier:
1304	case WO_none:	1183	case WO_none:
1305	if (rv == FE_RECYCLED)	1184	if (rv == FE_RECYCLED)
1306	return TRUE;	1185	return TRUE;
1307	break;	1186
		1187	/* receiver context, in the writeout path of the other node.
		1188	* avoid potential distributed deadlock */
		1189	epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
		1190	if (epoch)
		1191	break;
		1192	else
		1193	dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
		1194	/* Fall through */
1308		1195
1309	case WO_bdev_flush:	1196	case WO_bdev_flush:
1310	case WO_drain_io:	1197	case WO_drain_io:
1311	if (rv == FE_STILL_LIVE) {
1312	set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &mdev->current_epoch->flags);
1313	drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
1314	rv = drbd_flush_after_epoch(mdev, mdev->current_epoch);
1315	}
1316	if (rv == FE_RECYCLED)
1317	return TRUE;
1318
1319	/* The asender will send all the ACKs and barrier ACKs out, since
1320	all EEs moved from the active_ee to the done_ee. We need to
1321	provide a new epoch object for the EEs that come in soon */
1322	break;
1323	}
1324
1325	/* receiver context, in the writeout path of the other node.
1326	* avoid potential distributed deadlock */
1327	epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1328	if (!epoch) {
1329	dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1330	issue_flush = !test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &mdev->current_epoch->flags);
1331	drbd_wait_ee_list_empty(mdev, &mdev->active_ee);	1198	drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
1332	if (issue_flush) {	1199	drbd_flush(mdev);
1333	rv = drbd_flush_after_epoch(mdev, mdev->current_epoch);	1200
1334	if (rv == FE_RECYCLED)	1201	if (atomic_read(&mdev->current_epoch->epoch_size)) {
1335	return TRUE;	1202	epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
		1203	if (epoch)
		1204	break;
1336	}	1205	}
1337		1206
1338	drbd_wait_ee_list_empty(mdev, &mdev->done_ee);	1207	epoch = mdev->current_epoch;
		1208	wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
		1209
		1210	D_ASSERT(atomic_read(&epoch->active) == 0);
		1211	D_ASSERT(epoch->flags == 0);
1339		1212
1340	return TRUE;	1213	return TRUE;
		1214	default:
		1215	dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
		1216	return FALSE;
1341	}	1217	}
1342		1218
1343	epoch->flags = 0;	1219	epoch->flags = 0;
@@ -1652,15 +1528,8 @@ static int e_end_block(struct drbd_conf mdev, struct drbd_work w, int cancel)
1652	{	1528	{
1653	struct drbd_epoch_entry e = (struct drbd_epoch_entry )w;	1529	struct drbd_epoch_entry e = (struct drbd_epoch_entry )w;
1654	sector_t sector = e->sector;	1530	sector_t sector = e->sector;
1655	struct drbd_epoch *epoch;
1656	int ok = 1, pcmd;	1531	int ok = 1, pcmd;
1657		1532
1658	if (e->flags & EE_IS_BARRIER) {
1659	epoch = previous_epoch(mdev, e->epoch);
1660	if (epoch)
1661	drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE + (cancel ? EV_CLEANUP : 0));
1662	}
1663
1664	if (mdev->net_conf->wire_protocol == DRBD_PROT_C) {	1533	if (mdev->net_conf->wire_protocol == DRBD_PROT_C) {
1665	if (likely((e->flags & EE_WAS_ERROR) == 0)) {	1534	if (likely((e->flags & EE_WAS_ERROR) == 0)) {
1666	pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&	1535	pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
@@ -1817,27 +1686,6 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
1817	e->epoch = mdev->current_epoch;	1686	e->epoch = mdev->current_epoch;
1818	atomic_inc(&e->epoch->epoch_size);	1687	atomic_inc(&e->epoch->epoch_size);
1819	atomic_inc(&e->epoch->active);	1688	atomic_inc(&e->epoch->active);
1820
1821	if (mdev->write_ordering == WO_bio_barrier && atomic_read(&e->epoch->epoch_size) == 1) {
1822	struct drbd_epoch *epoch;
1823	/* Issue a barrier if we start a new epoch, and the previous epoch
1824	was not a epoch containing a single request which already was
1825	a Barrier. */
1826	epoch = list_entry(e->epoch->list.prev, struct drbd_epoch, list);
1827	if (epoch == e->epoch) {
1828	set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags);
1829	rw \|= REQ_HARDBARRIER;
1830	e->flags \|= EE_IS_BARRIER;
1831	} else {
1832	if (atomic_read(&epoch->epoch_size) > 1 \|\|
1833	!test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) {
1834	set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
1835	set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags);
1836	rw \|= REQ_HARDBARRIER;
1837	e->flags \|= EE_IS_BARRIER;
1838	}
1839	}
1840	}
1841	spin_unlock(&mdev->epoch_lock);	1689	spin_unlock(&mdev->epoch_lock);
1842		1690
1843	dp_flags = be32_to_cpu(p->dp_flags);	1691	dp_flags = be32_to_cpu(p->dp_flags);
@@ -1995,10 +1843,11 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
1995	break;	1843	break;
1996	}	1844	}
1997		1845
1998	if (mdev->state.pdsk == D_DISKLESS) {	1846	if (mdev->state.pdsk < D_INCONSISTENT) {
1999	/* In case we have the only disk of the cluster, */	1847	/* In case we have the only disk of the cluster, */
2000	drbd_set_out_of_sync(mdev, e->sector, e->size);	1848	drbd_set_out_of_sync(mdev, e->sector, e->size);
2001	e->flags \|= EE_CALL_AL_COMPLETE_IO;	1849	e->flags \|= EE_CALL_AL_COMPLETE_IO;
		1850	e->flags &= ~EE_MAY_SET_IN_SYNC;
2002	drbd_al_begin_io(mdev, e->sector);	1851	drbd_al_begin_io(mdev, e->sector);
2003	}	1852	}
2004		1853
@@ -3362,7 +3211,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3362	if (ns.conn == C_MASK) {	3211	if (ns.conn == C_MASK) {
3363	ns.conn = C_CONNECTED;	3212	ns.conn = C_CONNECTED;
3364	if (mdev->state.disk == D_NEGOTIATING) {	3213	if (mdev->state.disk == D_NEGOTIATING) {
3365	drbd_force_state(mdev, NS(disk, D_DISKLESS));	3214	drbd_force_state(mdev, NS(disk, D_FAILED));
3366	} else if (peer_state.disk == D_NEGOTIATING) {	3215	} else if (peer_state.disk == D_NEGOTIATING) {
3367	dev_err(DEV, "Disk attach process on the peer node was aborted.\n");	3216	dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3368	peer_state.disk = D_DISKLESS;	3217	peer_state.disk = D_DISKLESS;