aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2014-03-20 06:19:22 -0400
committerPhilipp Reisner <philipp.reisner@linbit.com>2014-07-10 12:35:02 -0400
commit08d0dabf48ccf55e310b8ae9381858b477cabe2e (patch)
treef59b53661650af6df67e71e958ca981ec0e2cdd6 /drivers/block/drbd
parent5d0b17f1a29e8189d04aef447a3a53cfd05529b2 (diff)
drbd: application writes may set-in-sync in protocol != C
If "dirty" blocks are written to during resync, that brings them in-sync. By explicitly requesting write-acks during resync even in protocol != C, we now can actually respect this. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r--drivers/block/drbd/drbd_interval.h4
-rw-r--r--drivers/block/drbd/drbd_main.c5
-rw-r--r--drivers/block/drbd/drbd_receiver.c3
-rw-r--r--drivers/block/drbd/drbd_req.c68
4 files changed, 49 insertions, 31 deletions
diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h
index f38fcb00c10d..f210543f05f4 100644
--- a/drivers/block/drbd/drbd_interval.h
+++ b/drivers/block/drbd/drbd_interval.h
@@ -10,7 +10,9 @@ struct drbd_interval {
10 unsigned int size; /* size in bytes */ 10 unsigned int size; /* size in bytes */
11 sector_t end; /* highest interval end in subtree */ 11 sector_t end; /* highest interval end in subtree */
12 int local:1 /* local or remote request? */; 12 int local:1 /* local or remote request? */;
13 int waiting:1; 13 int waiting:1; /* someone is waiting for this to complete */
14 int completed:1; /* this has been completed already;
15 * ignore for conflict detection */
14}; 16};
15 17
16static inline void drbd_clear_interval(struct drbd_interval *i) 18static inline void drbd_clear_interval(struct drbd_interval *i)
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 7c060243ae46..7ada5d363064 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1639,7 +1639,10 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
1639 if (peer_device->connection->agreed_pro_version >= 100) { 1639 if (peer_device->connection->agreed_pro_version >= 100) {
1640 if (req->rq_state & RQ_EXP_RECEIVE_ACK) 1640 if (req->rq_state & RQ_EXP_RECEIVE_ACK)
1641 dp_flags |= DP_SEND_RECEIVE_ACK; 1641 dp_flags |= DP_SEND_RECEIVE_ACK;
1642 if (req->rq_state & RQ_EXP_WRITE_ACK) 1642 /* During resync, request an explicit write ack,
1643 * even in protocol != C */
1644 if (req->rq_state & RQ_EXP_WRITE_ACK
1645 || (dp_flags & DP_MAY_SET_IN_SYNC))
1643 dp_flags |= DP_SEND_WRITE_ACK; 1646 dp_flags |= DP_SEND_WRITE_ACK;
1644 } 1647 }
1645 p->dp_flags = cpu_to_be32(dp_flags); 1648 p->dp_flags = cpu_to_be32(dp_flags);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index b89e6fb468c6..3a3c4893ea26 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1930,6 +1930,7 @@ static int e_end_block(struct drbd_work *w, int cancel)
1930 } 1930 }
1931 dec_unacked(device); 1931 dec_unacked(device);
1932 } 1932 }
1933
1933 /* we delete from the conflict detection hash _after_ we sent out the 1934 /* we delete from the conflict detection hash _after_ we sent out the
1934 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ 1935 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
1935 if (peer_req->flags & EE_IN_INTERVAL_TREE) { 1936 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
@@ -2156,6 +2157,8 @@ static int handle_write_conflicts(struct drbd_device *device,
2156 drbd_for_each_overlap(i, &device->write_requests, sector, size) { 2157 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
2157 if (i == &peer_req->i) 2158 if (i == &peer_req->i)
2158 continue; 2159 continue;
2160 if (i->completed)
2161 continue;
2159 2162
2160 if (!i->local) { 2163 if (!i->local) {
2161 /* 2164 /*
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 1ee735590b61..f07a724998ea 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -92,6 +92,19 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device,
92 return req; 92 return req;
93} 93}
94 94
95static void drbd_remove_request_interval(struct rb_root *root,
96 struct drbd_request *req)
97{
98 struct drbd_device *device = req->device;
99 struct drbd_interval *i = &req->i;
100
101 drbd_remove_interval(root, i);
102
103 /* Wake up any processes waiting for this request to complete. */
104 if (i->waiting)
105 wake_up(&device->misc_wait);
106}
107
95void drbd_req_destroy(struct kref *kref) 108void drbd_req_destroy(struct kref *kref)
96{ 109{
97 struct drbd_request *req = container_of(kref, struct drbd_request, kref); 110 struct drbd_request *req = container_of(kref, struct drbd_request, kref);
@@ -115,6 +128,20 @@ void drbd_req_destroy(struct kref *kref)
115 * here unconditionally */ 128 * here unconditionally */
116 list_del_init(&req->tl_requests); 129 list_del_init(&req->tl_requests);
117 130
131 /* finally remove the request from the conflict detection
132 * respective block_id verification interval tree. */
133 if (!drbd_interval_empty(&req->i)) {
134 struct rb_root *root;
135
136 if (s & RQ_WRITE)
137 root = &device->write_requests;
138 else
139 root = &device->read_requests;
140 drbd_remove_request_interval(root, req);
141 } else if (s & (RQ_NET_MASK & ~RQ_NET_DONE) && req->i.size != 0)
142 drbd_err(device, "drbd_req_destroy: Logic BUG: interval empty, but: rq_state=0x%x, sect=%llu, size=%u\n",
143 s, (unsigned long long)req->i.sector, req->i.size);
144
118 /* if it was a write, we may have to set the corresponding 145 /* if it was a write, we may have to set the corresponding
119 * bit(s) out-of-sync first. If it had a local part, we need to 146 * bit(s) out-of-sync first. If it had a local part, we need to
120 * release the reference to the activity log. */ 147 * release the reference to the activity log. */
@@ -188,19 +215,6 @@ void complete_master_bio(struct drbd_device *device,
188} 215}
189 216
190 217
191static void drbd_remove_request_interval(struct rb_root *root,
192 struct drbd_request *req)
193{
194 struct drbd_device *device = req->device;
195 struct drbd_interval *i = &req->i;
196
197 drbd_remove_interval(root, i);
198
199 /* Wake up any processes waiting for this request to complete. */
200 if (i->waiting)
201 wake_up(&device->misc_wait);
202}
203
204/* Helper for __req_mod(). 218/* Helper for __req_mod().
205 * Set m->bio to the master bio, if it is fit to be completed, 219 * Set m->bio to the master bio, if it is fit to be completed,
206 * or leave it alone (it is initialized to NULL in __req_mod), 220 * or leave it alone (it is initialized to NULL in __req_mod),
@@ -254,18 +268,6 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
254 ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK); 268 ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK);
255 error = PTR_ERR(req->private_bio); 269 error = PTR_ERR(req->private_bio);
256 270
257 /* remove the request from the conflict detection
258 * respective block_id verification hash */
259 if (!drbd_interval_empty(&req->i)) {
260 struct rb_root *root;
261
262 if (rw == WRITE)
263 root = &device->write_requests;
264 else
265 root = &device->read_requests;
266 drbd_remove_request_interval(root, req);
267 }
268
269 /* Before we can signal completion to the upper layers, 271 /* Before we can signal completion to the upper layers,
270 * we may need to close the current transfer log epoch. 272 * we may need to close the current transfer log epoch.
271 * We are within the request lock, so we can simply compare 273 * We are within the request lock, so we can simply compare
@@ -301,7 +303,15 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
301 m->error = ok ? 0 : (error ?: -EIO); 303 m->error = ok ? 0 : (error ?: -EIO);
302 m->bio = req->master_bio; 304 m->bio = req->master_bio;
303 req->master_bio = NULL; 305 req->master_bio = NULL;
306 /* We leave it in the tree, to be able to verify later
307 * write-acks in protocol != C during resync.
308 * But we mark it as "complete", so it won't be counted as
309 * conflict in a multi-primary setup. */
310 req->i.completed = true;
304 } 311 }
312
313 if (req->i.waiting)
314 wake_up(&device->misc_wait);
305} 315}
306 316
307static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put) 317static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put)
@@ -660,12 +670,13 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
660 case WRITE_ACKED_BY_PEER_AND_SIS: 670 case WRITE_ACKED_BY_PEER_AND_SIS:
661 req->rq_state |= RQ_NET_SIS; 671 req->rq_state |= RQ_NET_SIS;
662 case WRITE_ACKED_BY_PEER: 672 case WRITE_ACKED_BY_PEER:
663 D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK); 673 /* Normal operation protocol C: successfully written on peer.
664 /* protocol C; successfully written on peer. 674 * During resync, even in protocol != C,
675 * we requested an explicit write ack anyways.
676 * Which means we cannot even assert anything here.
665 * Nothing more to do here. 677 * Nothing more to do here.
666 * We want to keep the tl in place for all protocols, to cater 678 * We want to keep the tl in place for all protocols, to cater
667 * for volatile write-back caches on lower level devices. */ 679 * for volatile write-back caches on lower level devices. */
668
669 goto ack_common; 680 goto ack_common;
670 case RECV_ACKED_BY_PEER: 681 case RECV_ACKED_BY_PEER:
671 D_ASSERT(device, req->rq_state & RQ_EXP_RECEIVE_ACK); 682 D_ASSERT(device, req->rq_state & RQ_EXP_RECEIVE_ACK);
@@ -673,7 +684,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
673 * see also notes above in HANDED_OVER_TO_NETWORK about 684 * see also notes above in HANDED_OVER_TO_NETWORK about
674 * protocol != C */ 685 * protocol != C */
675 ack_common: 686 ack_common:
676 D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
677 mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK); 687 mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK);
678 break; 688 break;
679 689