diff options
author | Philipp Reisner <philipp.reisner@linbit.com> | 2009-09-25 19:07:19 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2009-10-01 15:17:49 -0400 |
commit | b411b3637fa71fce9cf2acf0639009500f5892fe (patch) | |
tree | 6b88e5202e0f137fef50e95b0441bcafdbf91990 /drivers/block/drbd/drbd_req.h | |
parent | 1a35e0f6443f4266dad4c569c55c57a9032596fa (diff) |
The DRBD driver
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd/drbd_req.h')
-rw-r--r-- | drivers/block/drbd/drbd_req.h | 327 |
1 files changed, 327 insertions, 0 deletions
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h new file mode 100644 index 000000000000..d37ab57f1209 --- /dev/null +++ b/drivers/block/drbd/drbd_req.h | |||
@@ -0,0 +1,327 @@ | |||
1 | /* | ||
2 | drbd_req.h | ||
3 | |||
4 | This file is part of DRBD by Philipp Reisner and Lars Ellenberg. | ||
5 | |||
6 | Copyright (C) 2006-2008, LINBIT Information Technologies GmbH. | ||
7 | Copyright (C) 2006-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. | ||
8 | Copyright (C) 2006-2008, Philipp Reisner <philipp.reisner@linbit.com>. | ||
9 | |||
10 | DRBD is free software; you can redistribute it and/or modify | ||
11 | it under the terms of the GNU General Public License as published by | ||
12 | the Free Software Foundation; either version 2, or (at your option) | ||
13 | any later version. | ||
14 | |||
15 | DRBD is distributed in the hope that it will be useful, | ||
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | GNU General Public License for more details. | ||
19 | |||
20 | You should have received a copy of the GNU General Public License | ||
21 | along with drbd; see the file COPYING. If not, write to | ||
22 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
23 | */ | ||
24 | |||
25 | #ifndef _DRBD_REQ_H | ||
26 | #define _DRBD_REQ_H | ||
27 | |||
28 | #include <linux/autoconf.h> | ||
29 | #include <linux/module.h> | ||
30 | |||
31 | #include <linux/slab.h> | ||
32 | #include <linux/drbd.h> | ||
33 | #include "drbd_int.h" | ||
34 | #include "drbd_wrappers.h" | ||
35 | |||
36 | /* The request callbacks will be called in irq context by the IDE drivers, | ||
37 | and in Softirqs/Tasklets/BH context by the SCSI drivers, | ||
38 | and by the receiver and worker in kernel-thread context. | ||
39 | Try to get the locking right :) */ | ||
40 | |||
41 | /* | ||
42 | * Objects of type struct drbd_request do only exist on a R_PRIMARY node, and are | ||
43 | * associated with IO requests originating from the block layer above us. | ||
44 | * | ||
45 | * There are quite a few things that may happen to a drbd request | ||
46 | * during its lifetime. | ||
47 | * | ||
48 | * It will be created. | ||
49 | * It will be marked with the intention to be | ||
50 | * submitted to local disk and/or | ||
51 | * send via the network. | ||
52 | * | ||
53 | * It has to be placed on the transfer log and other housekeeping lists, | ||
54 | * In case we have a network connection. | ||
55 | * | ||
56 | * It may be identified as a concurrent (write) request | ||
57 | * and be handled accordingly. | ||
58 | * | ||
59 | * It may me handed over to the local disk subsystem. | ||
60 | * It may be completed by the local disk subsystem, | ||
61 | * either sucessfully or with io-error. | ||
62 | * In case it is a READ request, and it failed locally, | ||
63 | * it may be retried remotely. | ||
64 | * | ||
65 | * It may be queued for sending. | ||
66 | * It may be handed over to the network stack, | ||
67 | * which may fail. | ||
68 | * It may be acknowledged by the "peer" according to the wire_protocol in use. | ||
69 | * this may be a negative ack. | ||
70 | * It may receive a faked ack when the network connection is lost and the | ||
71 | * transfer log is cleaned up. | ||
72 | * Sending may be canceled due to network connection loss. | ||
73 | * When it finally has outlived its time, | ||
74 | * corresponding dirty bits in the resync-bitmap may be cleared or set, | ||
75 | * it will be destroyed, | ||
76 | * and completion will be signalled to the originator, | ||
77 | * with or without "success". | ||
78 | */ | ||
79 | |||
80 | enum drbd_req_event { | ||
81 | created, | ||
82 | to_be_send, | ||
83 | to_be_submitted, | ||
84 | |||
85 | /* XXX yes, now I am inconsistent... | ||
86 | * these two are not "events" but "actions" | ||
87 | * oh, well... */ | ||
88 | queue_for_net_write, | ||
89 | queue_for_net_read, | ||
90 | |||
91 | send_canceled, | ||
92 | send_failed, | ||
93 | handed_over_to_network, | ||
94 | connection_lost_while_pending, | ||
95 | recv_acked_by_peer, | ||
96 | write_acked_by_peer, | ||
97 | write_acked_by_peer_and_sis, /* and set_in_sync */ | ||
98 | conflict_discarded_by_peer, | ||
99 | neg_acked, | ||
100 | barrier_acked, /* in protocol A and B */ | ||
101 | data_received, /* (remote read) */ | ||
102 | |||
103 | read_completed_with_error, | ||
104 | read_ahead_completed_with_error, | ||
105 | write_completed_with_error, | ||
106 | completed_ok, | ||
107 | nothing, /* for tracing only */ | ||
108 | }; | ||
109 | |||
110 | /* encoding of request states for now. we don't actually need that many bits. | ||
111 | * we don't need to do atomic bit operations either, since most of the time we | ||
112 | * need to look at the connection state and/or manipulate some lists at the | ||
113 | * same time, so we should hold the request lock anyways. | ||
114 | */ | ||
115 | enum drbd_req_state_bits { | ||
116 | /* 210 | ||
117 | * 000: no local possible | ||
118 | * 001: to be submitted | ||
119 | * UNUSED, we could map: 011: submitted, completion still pending | ||
120 | * 110: completed ok | ||
121 | * 010: completed with error | ||
122 | */ | ||
123 | __RQ_LOCAL_PENDING, | ||
124 | __RQ_LOCAL_COMPLETED, | ||
125 | __RQ_LOCAL_OK, | ||
126 | |||
127 | /* 76543 | ||
128 | * 00000: no network possible | ||
129 | * 00001: to be send | ||
130 | * 00011: to be send, on worker queue | ||
131 | * 00101: sent, expecting recv_ack (B) or write_ack (C) | ||
132 | * 11101: sent, | ||
133 | * recv_ack (B) or implicit "ack" (A), | ||
134 | * still waiting for the barrier ack. | ||
135 | * master_bio may already be completed and invalidated. | ||
136 | * 11100: write_acked (C), | ||
137 | * data_received (for remote read, any protocol) | ||
138 | * or finally the barrier ack has arrived (B,A)... | ||
139 | * request can be freed | ||
140 | * 01100: neg-acked (write, protocol C) | ||
141 | * or neg-d-acked (read, any protocol) | ||
142 | * or killed from the transfer log | ||
143 | * during cleanup after connection loss | ||
144 | * request can be freed | ||
145 | * 01000: canceled or send failed... | ||
146 | * request can be freed | ||
147 | */ | ||
148 | |||
149 | /* if "SENT" is not set, yet, this can still fail or be canceled. | ||
150 | * if "SENT" is set already, we still wait for an Ack packet. | ||
151 | * when cleared, the master_bio may be completed. | ||
152 | * in (B,A) the request object may still linger on the transaction log | ||
153 | * until the corresponding barrier ack comes in */ | ||
154 | __RQ_NET_PENDING, | ||
155 | |||
156 | /* If it is QUEUED, and it is a WRITE, it is also registered in the | ||
157 | * transfer log. Currently we need this flag to avoid conflicts between | ||
158 | * worker canceling the request and tl_clear_barrier killing it from | ||
159 | * transfer log. We should restructure the code so this conflict does | ||
160 | * no longer occur. */ | ||
161 | __RQ_NET_QUEUED, | ||
162 | |||
163 | /* well, actually only "handed over to the network stack". | ||
164 | * | ||
165 | * TODO can potentially be dropped because of the similar meaning | ||
166 | * of RQ_NET_SENT and ~RQ_NET_QUEUED. | ||
167 | * however it is not exactly the same. before we drop it | ||
168 | * we must ensure that we can tell a request with network part | ||
169 | * from a request without, regardless of what happens to it. */ | ||
170 | __RQ_NET_SENT, | ||
171 | |||
172 | /* when set, the request may be freed (if RQ_NET_QUEUED is clear). | ||
173 | * basically this means the corresponding P_BARRIER_ACK was received */ | ||
174 | __RQ_NET_DONE, | ||
175 | |||
176 | /* whether or not we know (C) or pretend (B,A) that the write | ||
177 | * was successfully written on the peer. | ||
178 | */ | ||
179 | __RQ_NET_OK, | ||
180 | |||
181 | /* peer called drbd_set_in_sync() for this write */ | ||
182 | __RQ_NET_SIS, | ||
183 | |||
184 | /* keep this last, its for the RQ_NET_MASK */ | ||
185 | __RQ_NET_MAX, | ||
186 | }; | ||
187 | |||
188 | #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) | ||
189 | #define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED) | ||
190 | #define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK) | ||
191 | |||
192 | #define RQ_LOCAL_MASK ((RQ_LOCAL_OK << 1)-1) /* 0x07 */ | ||
193 | |||
194 | #define RQ_NET_PENDING (1UL << __RQ_NET_PENDING) | ||
195 | #define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED) | ||
196 | #define RQ_NET_SENT (1UL << __RQ_NET_SENT) | ||
197 | #define RQ_NET_DONE (1UL << __RQ_NET_DONE) | ||
198 | #define RQ_NET_OK (1UL << __RQ_NET_OK) | ||
199 | #define RQ_NET_SIS (1UL << __RQ_NET_SIS) | ||
200 | |||
201 | /* 0x1f8 */ | ||
202 | #define RQ_NET_MASK (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK) | ||
203 | |||
204 | /* epoch entries */ | ||
205 | static inline | ||
206 | struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector) | ||
207 | { | ||
208 | BUG_ON(mdev->ee_hash_s == 0); | ||
209 | return mdev->ee_hash + | ||
210 | ((unsigned int)(sector>>HT_SHIFT) % mdev->ee_hash_s); | ||
211 | } | ||
212 | |||
213 | /* transfer log (drbd_request objects) */ | ||
214 | static inline | ||
215 | struct hlist_head *tl_hash_slot(struct drbd_conf *mdev, sector_t sector) | ||
216 | { | ||
217 | BUG_ON(mdev->tl_hash_s == 0); | ||
218 | return mdev->tl_hash + | ||
219 | ((unsigned int)(sector>>HT_SHIFT) % mdev->tl_hash_s); | ||
220 | } | ||
221 | |||
222 | /* application reads (drbd_request objects) */ | ||
223 | static struct hlist_head *ar_hash_slot(struct drbd_conf *mdev, sector_t sector) | ||
224 | { | ||
225 | return mdev->app_reads_hash | ||
226 | + ((unsigned int)(sector) % APP_R_HSIZE); | ||
227 | } | ||
228 | |||
229 | /* when we receive the answer for a read request, | ||
230 | * verify that we actually know about it */ | ||
231 | static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev, | ||
232 | u64 id, sector_t sector) | ||
233 | { | ||
234 | struct hlist_head *slot = ar_hash_slot(mdev, sector); | ||
235 | struct hlist_node *n; | ||
236 | struct drbd_request *req; | ||
237 | |||
238 | hlist_for_each_entry(req, n, slot, colision) { | ||
239 | if ((unsigned long)req == (unsigned long)id) { | ||
240 | D_ASSERT(req->sector == sector); | ||
241 | return req; | ||
242 | } | ||
243 | } | ||
244 | return NULL; | ||
245 | } | ||
246 | |||
247 | static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev, | ||
248 | struct bio *bio_src) | ||
249 | { | ||
250 | struct bio *bio; | ||
251 | struct drbd_request *req = | ||
252 | mempool_alloc(drbd_request_mempool, GFP_NOIO); | ||
253 | if (likely(req)) { | ||
254 | bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */ | ||
255 | |||
256 | req->rq_state = 0; | ||
257 | req->mdev = mdev; | ||
258 | req->master_bio = bio_src; | ||
259 | req->private_bio = bio; | ||
260 | req->epoch = 0; | ||
261 | req->sector = bio->bi_sector; | ||
262 | req->size = bio->bi_size; | ||
263 | req->start_time = jiffies; | ||
264 | INIT_HLIST_NODE(&req->colision); | ||
265 | INIT_LIST_HEAD(&req->tl_requests); | ||
266 | INIT_LIST_HEAD(&req->w.list); | ||
267 | |||
268 | bio->bi_private = req; | ||
269 | bio->bi_end_io = drbd_endio_pri; | ||
270 | bio->bi_next = NULL; | ||
271 | } | ||
272 | return req; | ||
273 | } | ||
274 | |||
275 | static inline void drbd_req_free(struct drbd_request *req) | ||
276 | { | ||
277 | mempool_free(req, drbd_request_mempool); | ||
278 | } | ||
279 | |||
280 | static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2) | ||
281 | { | ||
282 | return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9))); | ||
283 | } | ||
284 | |||
285 | /* Short lived temporary struct on the stack. | ||
286 | * We could squirrel the error to be returned into | ||
287 | * bio->bi_size, or similar. But that would be too ugly. */ | ||
288 | struct bio_and_error { | ||
289 | struct bio *bio; | ||
290 | int error; | ||
291 | }; | ||
292 | |||
293 | extern void _req_may_be_done(struct drbd_request *req, | ||
294 | struct bio_and_error *m); | ||
295 | extern void __req_mod(struct drbd_request *req, enum drbd_req_event what, | ||
296 | struct bio_and_error *m); | ||
297 | extern void complete_master_bio(struct drbd_conf *mdev, | ||
298 | struct bio_and_error *m); | ||
299 | |||
300 | /* use this if you don't want to deal with calling complete_master_bio() | ||
301 | * outside the spinlock, e.g. when walking some list on cleanup. */ | ||
302 | static inline void _req_mod(struct drbd_request *req, enum drbd_req_event what) | ||
303 | { | ||
304 | struct drbd_conf *mdev = req->mdev; | ||
305 | struct bio_and_error m; | ||
306 | |||
307 | /* __req_mod possibly frees req, do not touch req after that! */ | ||
308 | __req_mod(req, what, &m); | ||
309 | if (m.bio) | ||
310 | complete_master_bio(mdev, &m); | ||
311 | } | ||
312 | |||
313 | /* completion of master bio is outside of spinlock. | ||
314 | * If you need it irqsave, do it your self! */ | ||
315 | static inline void req_mod(struct drbd_request *req, | ||
316 | enum drbd_req_event what) | ||
317 | { | ||
318 | struct drbd_conf *mdev = req->mdev; | ||
319 | struct bio_and_error m; | ||
320 | spin_lock_irq(&mdev->req_lock); | ||
321 | __req_mod(req, what, &m); | ||
322 | spin_unlock_irq(&mdev->req_lock); | ||
323 | |||
324 | if (m.bio) | ||
325 | complete_master_bio(mdev, &m); | ||
326 | } | ||
327 | #endif | ||