aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd/drbd_receiver.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/drbd/drbd_receiver.c')
-rw-r--r--drivers/block/drbd/drbd_receiver.c217
1 files changed, 33 insertions, 184 deletions
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index efd6169acf2f..d299fe9e78c8 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -49,11 +49,6 @@
49 49
50#include "drbd_vli.h" 50#include "drbd_vli.h"
51 51
52struct flush_work {
53 struct drbd_work w;
54 struct drbd_epoch *epoch;
55};
56
57enum finish_epoch { 52enum finish_epoch {
58 FE_STILL_LIVE, 53 FE_STILL_LIVE,
59 FE_DESTROYED, 54 FE_DESTROYED,
@@ -66,16 +61,6 @@ static int drbd_do_auth(struct drbd_conf *mdev);
66static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event); 61static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
67static int e_end_block(struct drbd_conf *, struct drbd_work *, int); 62static int e_end_block(struct drbd_conf *, struct drbd_work *, int);
68 63
69static struct drbd_epoch *previous_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch)
70{
71 struct drbd_epoch *prev;
72 spin_lock(&mdev->epoch_lock);
73 prev = list_entry(epoch->list.prev, struct drbd_epoch, list);
74 if (prev == epoch || prev == mdev->current_epoch)
75 prev = NULL;
76 spin_unlock(&mdev->epoch_lock);
77 return prev;
78}
79 64
80#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) 65#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
81 66
@@ -981,7 +966,7 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi
981 return TRUE; 966 return TRUE;
982} 967}
983 968
984static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch) 969static void drbd_flush(struct drbd_conf *mdev)
985{ 970{
986 int rv; 971 int rv;
987 972
@@ -997,24 +982,6 @@ static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d
997 } 982 }
998 put_ldev(mdev); 983 put_ldev(mdev);
999 } 984 }
1000
1001 return drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE);
1002}
1003
1004static int w_flush(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1005{
1006 struct flush_work *fw = (struct flush_work *)w;
1007 struct drbd_epoch *epoch = fw->epoch;
1008
1009 kfree(w);
1010
1011 if (!test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags))
1012 drbd_flush_after_epoch(mdev, epoch);
1013
1014 drbd_may_finish_epoch(mdev, epoch, EV_PUT |
1015 (mdev->state.conn < C_CONNECTED ? EV_CLEANUP : 0));
1016
1017 return 1;
1018} 985}
1019 986
1020/** 987/**
@@ -1027,15 +994,13 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1027 struct drbd_epoch *epoch, 994 struct drbd_epoch *epoch,
1028 enum epoch_event ev) 995 enum epoch_event ev)
1029{ 996{
1030 int finish, epoch_size; 997 int epoch_size;
1031 struct drbd_epoch *next_epoch; 998 struct drbd_epoch *next_epoch;
1032 int schedule_flush = 0;
1033 enum finish_epoch rv = FE_STILL_LIVE; 999 enum finish_epoch rv = FE_STILL_LIVE;
1034 1000
1035 spin_lock(&mdev->epoch_lock); 1001 spin_lock(&mdev->epoch_lock);
1036 do { 1002 do {
1037 next_epoch = NULL; 1003 next_epoch = NULL;
1038 finish = 0;
1039 1004
1040 epoch_size = atomic_read(&epoch->epoch_size); 1005 epoch_size = atomic_read(&epoch->epoch_size);
1041 1006
@@ -1045,16 +1010,6 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1045 break; 1010 break;
1046 case EV_GOT_BARRIER_NR: 1011 case EV_GOT_BARRIER_NR:
1047 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags); 1012 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
1048
1049 /* Special case: If we just switched from WO_bio_barrier to
1050 WO_bdev_flush we should not finish the current epoch */
1051 if (test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags) && epoch_size == 1 &&
1052 mdev->write_ordering != WO_bio_barrier &&
1053 epoch == mdev->current_epoch)
1054 clear_bit(DE_CONTAINS_A_BARRIER, &epoch->flags);
1055 break;
1056 case EV_BARRIER_DONE:
1057 set_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags);
1058 break; 1013 break;
1059 case EV_BECAME_LAST: 1014 case EV_BECAME_LAST:
1060 /* nothing to do*/ 1015 /* nothing to do*/
@@ -1063,23 +1018,7 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1063 1018
1064 if (epoch_size != 0 && 1019 if (epoch_size != 0 &&
1065 atomic_read(&epoch->active) == 0 && 1020 atomic_read(&epoch->active) == 0 &&
1066 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) && 1021 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
1067 epoch->list.prev == &mdev->current_epoch->list &&
1068 !test_bit(DE_IS_FINISHING, &epoch->flags)) {
1069 /* Nearly all conditions are met to finish that epoch... */
1070 if (test_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags) ||
1071 mdev->write_ordering == WO_none ||
1072 (epoch_size == 1 && test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) ||
1073 ev & EV_CLEANUP) {
1074 finish = 1;
1075 set_bit(DE_IS_FINISHING, &epoch->flags);
1076 } else if (!test_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags) &&
1077 mdev->write_ordering == WO_bio_barrier) {
1078 atomic_inc(&epoch->active);
1079 schedule_flush = 1;
1080 }
1081 }
1082 if (finish) {
1083 if (!(ev & EV_CLEANUP)) { 1022 if (!(ev & EV_CLEANUP)) {
1084 spin_unlock(&mdev->epoch_lock); 1023 spin_unlock(&mdev->epoch_lock);
1085 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size); 1024 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
@@ -1102,6 +1041,7 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1102 /* atomic_set(&epoch->active, 0); is already zero */ 1041 /* atomic_set(&epoch->active, 0); is already zero */
1103 if (rv == FE_STILL_LIVE) 1042 if (rv == FE_STILL_LIVE)
1104 rv = FE_RECYCLED; 1043 rv = FE_RECYCLED;
1044 wake_up(&mdev->ee_wait);
1105 } 1045 }
1106 } 1046 }
1107 1047
@@ -1113,22 +1053,6 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1113 1053
1114 spin_unlock(&mdev->epoch_lock); 1054 spin_unlock(&mdev->epoch_lock);
1115 1055
1116 if (schedule_flush) {
1117 struct flush_work *fw;
1118 fw = kmalloc(sizeof(*fw), GFP_ATOMIC);
1119 if (fw) {
1120 fw->w.cb = w_flush;
1121 fw->epoch = epoch;
1122 drbd_queue_work(&mdev->data.work, &fw->w);
1123 } else {
1124 dev_warn(DEV, "Could not kmalloc a flush_work obj\n");
1125 set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
1126 /* That is not a recursion, only one level */
1127 drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE);
1128 drbd_may_finish_epoch(mdev, epoch, EV_PUT);
1129 }
1130 }
1131
1132 return rv; 1056 return rv;
1133} 1057}
1134 1058
@@ -1144,19 +1068,16 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo)
1144 [WO_none] = "none", 1068 [WO_none] = "none",
1145 [WO_drain_io] = "drain", 1069 [WO_drain_io] = "drain",
1146 [WO_bdev_flush] = "flush", 1070 [WO_bdev_flush] = "flush",
1147 [WO_bio_barrier] = "barrier",
1148 }; 1071 };
1149 1072
1150 pwo = mdev->write_ordering; 1073 pwo = mdev->write_ordering;
1151 wo = min(pwo, wo); 1074 wo = min(pwo, wo);
1152 if (wo == WO_bio_barrier && mdev->ldev->dc.no_disk_barrier)
1153 wo = WO_bdev_flush;
1154 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush) 1075 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1155 wo = WO_drain_io; 1076 wo = WO_drain_io;
1156 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain) 1077 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1157 wo = WO_none; 1078 wo = WO_none;
1158 mdev->write_ordering = wo; 1079 mdev->write_ordering = wo;
1159 if (pwo != mdev->write_ordering || wo == WO_bio_barrier) 1080 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
1160 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]); 1081 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1161} 1082}
1162 1083
@@ -1192,7 +1113,7 @@ next_bio:
1192 bio->bi_sector = sector; 1113 bio->bi_sector = sector;
1193 bio->bi_bdev = mdev->ldev->backing_bdev; 1114 bio->bi_bdev = mdev->ldev->backing_bdev;
1194 /* we special case some flags in the multi-bio case, see below 1115 /* we special case some flags in the multi-bio case, see below
1195 * (REQ_UNPLUG, REQ_HARDBARRIER) */ 1116 * (REQ_UNPLUG) */
1196 bio->bi_rw = rw; 1117 bio->bi_rw = rw;
1197 bio->bi_private = e; 1118 bio->bi_private = e;
1198 bio->bi_end_io = drbd_endio_sec; 1119 bio->bi_end_io = drbd_endio_sec;
@@ -1226,11 +1147,6 @@ next_bio:
1226 bio->bi_rw &= ~REQ_UNPLUG; 1147 bio->bi_rw &= ~REQ_UNPLUG;
1227 1148
1228 drbd_generic_make_request(mdev, fault_type, bio); 1149 drbd_generic_make_request(mdev, fault_type, bio);
1229
1230 /* strip off REQ_HARDBARRIER,
1231 * unless it is the first or last bio */
1232 if (bios && bios->bi_next)
1233 bios->bi_rw &= ~REQ_HARDBARRIER;
1234 } while (bios); 1150 } while (bios);
1235 maybe_kick_lo(mdev); 1151 maybe_kick_lo(mdev);
1236 return 0; 1152 return 0;
@@ -1244,45 +1160,9 @@ fail:
1244 return -ENOMEM; 1160 return -ENOMEM;
1245} 1161}
1246 1162
1247/**
1248 * w_e_reissue() - Worker callback; Resubmit a bio, without REQ_HARDBARRIER set
1249 * @mdev: DRBD device.
1250 * @w: work object.
1251 * @cancel: The connection will be closed anyways (unused in this callback)
1252 */
1253int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __releases(local)
1254{
1255 struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
1256 /* We leave DE_CONTAINS_A_BARRIER and EE_IS_BARRIER in place,
1257 (and DE_BARRIER_IN_NEXT_EPOCH_ISSUED in the previous Epoch)
1258 so that we can finish that epoch in drbd_may_finish_epoch().
1259 That is necessary if we already have a long chain of Epochs, before
1260 we realize that REQ_HARDBARRIER is actually not supported */
1261
1262 /* As long as the -ENOTSUPP on the barrier is reported immediately
1263 that will never trigger. If it is reported late, we will just
1264 print that warning and continue correctly for all future requests
1265 with WO_bdev_flush */
1266 if (previous_epoch(mdev, e->epoch))
1267 dev_warn(DEV, "Write ordering was not enforced (one time event)\n");
1268
1269 /* we still have a local reference,
1270 * get_ldev was done in receive_Data. */
1271
1272 e->w.cb = e_end_block;
1273 if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_DT_WR) != 0) {
1274 /* drbd_submit_ee fails for one reason only:
1275 * if was not able to allocate sufficient bios.
1276 * requeue, try again later. */
1277 e->w.cb = w_e_reissue;
1278 drbd_queue_work(&mdev->data.work, &e->w);
1279 }
1280 return 1;
1281}
1282
1283static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 1163static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
1284{ 1164{
1285 int rv, issue_flush; 1165 int rv;
1286 struct p_barrier *p = &mdev->data.rbuf.barrier; 1166 struct p_barrier *p = &mdev->data.rbuf.barrier;
1287 struct drbd_epoch *epoch; 1167 struct drbd_epoch *epoch;
1288 1168
@@ -1300,44 +1180,40 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign
1300 * Therefore we must send the barrier_ack after the barrier request was 1180 * Therefore we must send the barrier_ack after the barrier request was
1301 * completed. */ 1181 * completed. */
1302 switch (mdev->write_ordering) { 1182 switch (mdev->write_ordering) {
1303 case WO_bio_barrier:
1304 case WO_none: 1183 case WO_none:
1305 if (rv == FE_RECYCLED) 1184 if (rv == FE_RECYCLED)
1306 return TRUE; 1185 return TRUE;
1307 break; 1186
1187 /* receiver context, in the writeout path of the other node.
1188 * avoid potential distributed deadlock */
1189 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1190 if (epoch)
1191 break;
1192 else
1193 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1194 /* Fall through */
1308 1195
1309 case WO_bdev_flush: 1196 case WO_bdev_flush:
1310 case WO_drain_io: 1197 case WO_drain_io:
1311 if (rv == FE_STILL_LIVE) {
1312 set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &mdev->current_epoch->flags);
1313 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
1314 rv = drbd_flush_after_epoch(mdev, mdev->current_epoch);
1315 }
1316 if (rv == FE_RECYCLED)
1317 return TRUE;
1318
1319 /* The asender will send all the ACKs and barrier ACKs out, since
1320 all EEs moved from the active_ee to the done_ee. We need to
1321 provide a new epoch object for the EEs that come in soon */
1322 break;
1323 }
1324
1325 /* receiver context, in the writeout path of the other node.
1326 * avoid potential distributed deadlock */
1327 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1328 if (!epoch) {
1329 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1330 issue_flush = !test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &mdev->current_epoch->flags);
1331 drbd_wait_ee_list_empty(mdev, &mdev->active_ee); 1198 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
1332 if (issue_flush) { 1199 drbd_flush(mdev);
1333 rv = drbd_flush_after_epoch(mdev, mdev->current_epoch); 1200
1334 if (rv == FE_RECYCLED) 1201 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1335 return TRUE; 1202 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1203 if (epoch)
1204 break;
1336 } 1205 }
1337 1206
1338 drbd_wait_ee_list_empty(mdev, &mdev->done_ee); 1207 epoch = mdev->current_epoch;
1208 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1209
1210 D_ASSERT(atomic_read(&epoch->active) == 0);
1211 D_ASSERT(epoch->flags == 0);
1339 1212
1340 return TRUE; 1213 return TRUE;
1214 default:
1215 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
1216 return FALSE;
1341 } 1217 }
1342 1218
1343 epoch->flags = 0; 1219 epoch->flags = 0;
@@ -1652,15 +1528,8 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1652{ 1528{
1653 struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; 1529 struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
1654 sector_t sector = e->sector; 1530 sector_t sector = e->sector;
1655 struct drbd_epoch *epoch;
1656 int ok = 1, pcmd; 1531 int ok = 1, pcmd;
1657 1532
1658 if (e->flags & EE_IS_BARRIER) {
1659 epoch = previous_epoch(mdev, e->epoch);
1660 if (epoch)
1661 drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE + (cancel ? EV_CLEANUP : 0));
1662 }
1663
1664 if (mdev->net_conf->wire_protocol == DRBD_PROT_C) { 1533 if (mdev->net_conf->wire_protocol == DRBD_PROT_C) {
1665 if (likely((e->flags & EE_WAS_ERROR) == 0)) { 1534 if (likely((e->flags & EE_WAS_ERROR) == 0)) {
1666 pcmd = (mdev->state.conn >= C_SYNC_SOURCE && 1535 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
@@ -1817,27 +1686,6 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
1817 e->epoch = mdev->current_epoch; 1686 e->epoch = mdev->current_epoch;
1818 atomic_inc(&e->epoch->epoch_size); 1687 atomic_inc(&e->epoch->epoch_size);
1819 atomic_inc(&e->epoch->active); 1688 atomic_inc(&e->epoch->active);
1820
1821 if (mdev->write_ordering == WO_bio_barrier && atomic_read(&e->epoch->epoch_size) == 1) {
1822 struct drbd_epoch *epoch;
1823 /* Issue a barrier if we start a new epoch, and the previous epoch
1824 was not a epoch containing a single request which already was
1825 a Barrier. */
1826 epoch = list_entry(e->epoch->list.prev, struct drbd_epoch, list);
1827 if (epoch == e->epoch) {
1828 set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags);
1829 rw |= REQ_HARDBARRIER;
1830 e->flags |= EE_IS_BARRIER;
1831 } else {
1832 if (atomic_read(&epoch->epoch_size) > 1 ||
1833 !test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) {
1834 set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
1835 set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags);
1836 rw |= REQ_HARDBARRIER;
1837 e->flags |= EE_IS_BARRIER;
1838 }
1839 }
1840 }
1841 spin_unlock(&mdev->epoch_lock); 1689 spin_unlock(&mdev->epoch_lock);
1842 1690
1843 dp_flags = be32_to_cpu(p->dp_flags); 1691 dp_flags = be32_to_cpu(p->dp_flags);
@@ -1995,10 +1843,11 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
1995 break; 1843 break;
1996 } 1844 }
1997 1845
1998 if (mdev->state.pdsk == D_DISKLESS) { 1846 if (mdev->state.pdsk < D_INCONSISTENT) {
1999 /* In case we have the only disk of the cluster, */ 1847 /* In case we have the only disk of the cluster, */
2000 drbd_set_out_of_sync(mdev, e->sector, e->size); 1848 drbd_set_out_of_sync(mdev, e->sector, e->size);
2001 e->flags |= EE_CALL_AL_COMPLETE_IO; 1849 e->flags |= EE_CALL_AL_COMPLETE_IO;
1850 e->flags &= ~EE_MAY_SET_IN_SYNC;
2002 drbd_al_begin_io(mdev, e->sector); 1851 drbd_al_begin_io(mdev, e->sector);
2003 } 1852 }
2004 1853
@@ -3362,7 +3211,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3362 if (ns.conn == C_MASK) { 3211 if (ns.conn == C_MASK) {
3363 ns.conn = C_CONNECTED; 3212 ns.conn = C_CONNECTED;
3364 if (mdev->state.disk == D_NEGOTIATING) { 3213 if (mdev->state.disk == D_NEGOTIATING) {
3365 drbd_force_state(mdev, NS(disk, D_DISKLESS)); 3214 drbd_force_state(mdev, NS(disk, D_FAILED));
3366 } else if (peer_state.disk == D_NEGOTIATING) { 3215 } else if (peer_state.disk == D_NEGOTIATING) {
3367 dev_err(DEV, "Disk attach process on the peer node was aborted.\n"); 3216 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3368 peer_state.disk = D_DISKLESS; 3217 peer_state.disk = D_DISKLESS;