diff options
Diffstat (limited to 'drivers/block/drbd/drbd_req.h')
-rw-r--r-- | drivers/block/drbd/drbd_req.h | 326 |
1 files changed, 326 insertions, 0 deletions
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h new file mode 100644 index 000000000000..f22c1bc8ec7e --- /dev/null +++ b/drivers/block/drbd/drbd_req.h | |||
@@ -0,0 +1,326 @@ | |||
1 | /* | ||
2 | drbd_req.h | ||
3 | |||
4 | This file is part of DRBD by Philipp Reisner and Lars Ellenberg. | ||
5 | |||
6 | Copyright (C) 2006-2008, LINBIT Information Technologies GmbH. | ||
7 | Copyright (C) 2006-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. | ||
8 | Copyright (C) 2006-2008, Philipp Reisner <philipp.reisner@linbit.com>. | ||
9 | |||
10 | DRBD is free software; you can redistribute it and/or modify | ||
11 | it under the terms of the GNU General Public License as published by | ||
12 | the Free Software Foundation; either version 2, or (at your option) | ||
13 | any later version. | ||
14 | |||
15 | DRBD is distributed in the hope that it will be useful, | ||
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | GNU General Public License for more details. | ||
19 | |||
20 | You should have received a copy of the GNU General Public License | ||
21 | along with drbd; see the file COPYING. If not, write to | ||
22 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
23 | */ | ||
24 | |||
25 | #ifndef _DRBD_REQ_H | ||
26 | #define _DRBD_REQ_H | ||
27 | |||
28 | #include <linux/module.h> | ||
29 | |||
30 | #include <linux/slab.h> | ||
31 | #include <linux/drbd.h> | ||
32 | #include "drbd_int.h" | ||
33 | #include "drbd_wrappers.h" | ||
34 | |||
35 | /* The request callbacks will be called in irq context by the IDE drivers, | ||
36 | and in Softirqs/Tasklets/BH context by the SCSI drivers, | ||
37 | and by the receiver and worker in kernel-thread context. | ||
38 | Try to get the locking right :) */ | ||
39 | |||
40 | /* | ||
41 | * Objects of type struct drbd_request do only exist on a R_PRIMARY node, and are | ||
42 | * associated with IO requests originating from the block layer above us. | ||
43 | * | ||
44 | * There are quite a few things that may happen to a drbd request | ||
45 | * during its lifetime. | ||
46 | * | ||
47 | * It will be created. | ||
48 | * It will be marked with the intention to be | ||
49 | * submitted to local disk and/or | ||
50 | * send via the network. | ||
51 | * | ||
52 | * It has to be placed on the transfer log and other housekeeping lists, | ||
53 | * In case we have a network connection. | ||
54 | * | ||
55 | * It may be identified as a concurrent (write) request | ||
56 | * and be handled accordingly. | ||
57 | * | ||
58 | * It may me handed over to the local disk subsystem. | ||
59 | * It may be completed by the local disk subsystem, | ||
60 | * either sucessfully or with io-error. | ||
61 | * In case it is a READ request, and it failed locally, | ||
62 | * it may be retried remotely. | ||
63 | * | ||
64 | * It may be queued for sending. | ||
65 | * It may be handed over to the network stack, | ||
66 | * which may fail. | ||
67 | * It may be acknowledged by the "peer" according to the wire_protocol in use. | ||
68 | * this may be a negative ack. | ||
69 | * It may receive a faked ack when the network connection is lost and the | ||
70 | * transfer log is cleaned up. | ||
71 | * Sending may be canceled due to network connection loss. | ||
72 | * When it finally has outlived its time, | ||
73 | * corresponding dirty bits in the resync-bitmap may be cleared or set, | ||
74 | * it will be destroyed, | ||
75 | * and completion will be signalled to the originator, | ||
76 | * with or without "success". | ||
77 | */ | ||
78 | |||
79 | enum drbd_req_event { | ||
80 | created, | ||
81 | to_be_send, | ||
82 | to_be_submitted, | ||
83 | |||
84 | /* XXX yes, now I am inconsistent... | ||
85 | * these two are not "events" but "actions" | ||
86 | * oh, well... */ | ||
87 | queue_for_net_write, | ||
88 | queue_for_net_read, | ||
89 | |||
90 | send_canceled, | ||
91 | send_failed, | ||
92 | handed_over_to_network, | ||
93 | connection_lost_while_pending, | ||
94 | recv_acked_by_peer, | ||
95 | write_acked_by_peer, | ||
96 | write_acked_by_peer_and_sis, /* and set_in_sync */ | ||
97 | conflict_discarded_by_peer, | ||
98 | neg_acked, | ||
99 | barrier_acked, /* in protocol A and B */ | ||
100 | data_received, /* (remote read) */ | ||
101 | |||
102 | read_completed_with_error, | ||
103 | read_ahead_completed_with_error, | ||
104 | write_completed_with_error, | ||
105 | completed_ok, | ||
106 | nothing, /* for tracing only */ | ||
107 | }; | ||
108 | |||
109 | /* encoding of request states for now. we don't actually need that many bits. | ||
110 | * we don't need to do atomic bit operations either, since most of the time we | ||
111 | * need to look at the connection state and/or manipulate some lists at the | ||
112 | * same time, so we should hold the request lock anyways. | ||
113 | */ | ||
114 | enum drbd_req_state_bits { | ||
115 | /* 210 | ||
116 | * 000: no local possible | ||
117 | * 001: to be submitted | ||
118 | * UNUSED, we could map: 011: submitted, completion still pending | ||
119 | * 110: completed ok | ||
120 | * 010: completed with error | ||
121 | */ | ||
122 | __RQ_LOCAL_PENDING, | ||
123 | __RQ_LOCAL_COMPLETED, | ||
124 | __RQ_LOCAL_OK, | ||
125 | |||
126 | /* 76543 | ||
127 | * 00000: no network possible | ||
128 | * 00001: to be send | ||
129 | * 00011: to be send, on worker queue | ||
130 | * 00101: sent, expecting recv_ack (B) or write_ack (C) | ||
131 | * 11101: sent, | ||
132 | * recv_ack (B) or implicit "ack" (A), | ||
133 | * still waiting for the barrier ack. | ||
134 | * master_bio may already be completed and invalidated. | ||
135 | * 11100: write_acked (C), | ||
136 | * data_received (for remote read, any protocol) | ||
137 | * or finally the barrier ack has arrived (B,A)... | ||
138 | * request can be freed | ||
139 | * 01100: neg-acked (write, protocol C) | ||
140 | * or neg-d-acked (read, any protocol) | ||
141 | * or killed from the transfer log | ||
142 | * during cleanup after connection loss | ||
143 | * request can be freed | ||
144 | * 01000: canceled or send failed... | ||
145 | * request can be freed | ||
146 | */ | ||
147 | |||
148 | /* if "SENT" is not set, yet, this can still fail or be canceled. | ||
149 | * if "SENT" is set already, we still wait for an Ack packet. | ||
150 | * when cleared, the master_bio may be completed. | ||
151 | * in (B,A) the request object may still linger on the transaction log | ||
152 | * until the corresponding barrier ack comes in */ | ||
153 | __RQ_NET_PENDING, | ||
154 | |||
155 | /* If it is QUEUED, and it is a WRITE, it is also registered in the | ||
156 | * transfer log. Currently we need this flag to avoid conflicts between | ||
157 | * worker canceling the request and tl_clear_barrier killing it from | ||
158 | * transfer log. We should restructure the code so this conflict does | ||
159 | * no longer occur. */ | ||
160 | __RQ_NET_QUEUED, | ||
161 | |||
162 | /* well, actually only "handed over to the network stack". | ||
163 | * | ||
164 | * TODO can potentially be dropped because of the similar meaning | ||
165 | * of RQ_NET_SENT and ~RQ_NET_QUEUED. | ||
166 | * however it is not exactly the same. before we drop it | ||
167 | * we must ensure that we can tell a request with network part | ||
168 | * from a request without, regardless of what happens to it. */ | ||
169 | __RQ_NET_SENT, | ||
170 | |||
171 | /* when set, the request may be freed (if RQ_NET_QUEUED is clear). | ||
172 | * basically this means the corresponding P_BARRIER_ACK was received */ | ||
173 | __RQ_NET_DONE, | ||
174 | |||
175 | /* whether or not we know (C) or pretend (B,A) that the write | ||
176 | * was successfully written on the peer. | ||
177 | */ | ||
178 | __RQ_NET_OK, | ||
179 | |||
180 | /* peer called drbd_set_in_sync() for this write */ | ||
181 | __RQ_NET_SIS, | ||
182 | |||
183 | /* keep this last, its for the RQ_NET_MASK */ | ||
184 | __RQ_NET_MAX, | ||
185 | }; | ||
186 | |||
187 | #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) | ||
188 | #define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED) | ||
189 | #define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK) | ||
190 | |||
191 | #define RQ_LOCAL_MASK ((RQ_LOCAL_OK << 1)-1) /* 0x07 */ | ||
192 | |||
193 | #define RQ_NET_PENDING (1UL << __RQ_NET_PENDING) | ||
194 | #define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED) | ||
195 | #define RQ_NET_SENT (1UL << __RQ_NET_SENT) | ||
196 | #define RQ_NET_DONE (1UL << __RQ_NET_DONE) | ||
197 | #define RQ_NET_OK (1UL << __RQ_NET_OK) | ||
198 | #define RQ_NET_SIS (1UL << __RQ_NET_SIS) | ||
199 | |||
200 | /* 0x1f8 */ | ||
201 | #define RQ_NET_MASK (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK) | ||
202 | |||
203 | /* epoch entries */ | ||
204 | static inline | ||
205 | struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector) | ||
206 | { | ||
207 | BUG_ON(mdev->ee_hash_s == 0); | ||
208 | return mdev->ee_hash + | ||
209 | ((unsigned int)(sector>>HT_SHIFT) % mdev->ee_hash_s); | ||
210 | } | ||
211 | |||
212 | /* transfer log (drbd_request objects) */ | ||
213 | static inline | ||
214 | struct hlist_head *tl_hash_slot(struct drbd_conf *mdev, sector_t sector) | ||
215 | { | ||
216 | BUG_ON(mdev->tl_hash_s == 0); | ||
217 | return mdev->tl_hash + | ||
218 | ((unsigned int)(sector>>HT_SHIFT) % mdev->tl_hash_s); | ||
219 | } | ||
220 | |||
221 | /* application reads (drbd_request objects) */ | ||
222 | static struct hlist_head *ar_hash_slot(struct drbd_conf *mdev, sector_t sector) | ||
223 | { | ||
224 | return mdev->app_reads_hash | ||
225 | + ((unsigned int)(sector) % APP_R_HSIZE); | ||
226 | } | ||
227 | |||
228 | /* when we receive the answer for a read request, | ||
229 | * verify that we actually know about it */ | ||
230 | static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev, | ||
231 | u64 id, sector_t sector) | ||
232 | { | ||
233 | struct hlist_head *slot = ar_hash_slot(mdev, sector); | ||
234 | struct hlist_node *n; | ||
235 | struct drbd_request *req; | ||
236 | |||
237 | hlist_for_each_entry(req, n, slot, colision) { | ||
238 | if ((unsigned long)req == (unsigned long)id) { | ||
239 | D_ASSERT(req->sector == sector); | ||
240 | return req; | ||
241 | } | ||
242 | } | ||
243 | return NULL; | ||
244 | } | ||
245 | |||
246 | static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev, | ||
247 | struct bio *bio_src) | ||
248 | { | ||
249 | struct bio *bio; | ||
250 | struct drbd_request *req = | ||
251 | mempool_alloc(drbd_request_mempool, GFP_NOIO); | ||
252 | if (likely(req)) { | ||
253 | bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */ | ||
254 | |||
255 | req->rq_state = 0; | ||
256 | req->mdev = mdev; | ||
257 | req->master_bio = bio_src; | ||
258 | req->private_bio = bio; | ||
259 | req->epoch = 0; | ||
260 | req->sector = bio->bi_sector; | ||
261 | req->size = bio->bi_size; | ||
262 | req->start_time = jiffies; | ||
263 | INIT_HLIST_NODE(&req->colision); | ||
264 | INIT_LIST_HEAD(&req->tl_requests); | ||
265 | INIT_LIST_HEAD(&req->w.list); | ||
266 | |||
267 | bio->bi_private = req; | ||
268 | bio->bi_end_io = drbd_endio_pri; | ||
269 | bio->bi_next = NULL; | ||
270 | } | ||
271 | return req; | ||
272 | } | ||
273 | |||
274 | static inline void drbd_req_free(struct drbd_request *req) | ||
275 | { | ||
276 | mempool_free(req, drbd_request_mempool); | ||
277 | } | ||
278 | |||
279 | static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2) | ||
280 | { | ||
281 | return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9))); | ||
282 | } | ||
283 | |||
284 | /* Short lived temporary struct on the stack. | ||
285 | * We could squirrel the error to be returned into | ||
286 | * bio->bi_size, or similar. But that would be too ugly. */ | ||
287 | struct bio_and_error { | ||
288 | struct bio *bio; | ||
289 | int error; | ||
290 | }; | ||
291 | |||
292 | extern void _req_may_be_done(struct drbd_request *req, | ||
293 | struct bio_and_error *m); | ||
294 | extern void __req_mod(struct drbd_request *req, enum drbd_req_event what, | ||
295 | struct bio_and_error *m); | ||
296 | extern void complete_master_bio(struct drbd_conf *mdev, | ||
297 | struct bio_and_error *m); | ||
298 | |||
299 | /* use this if you don't want to deal with calling complete_master_bio() | ||
300 | * outside the spinlock, e.g. when walking some list on cleanup. */ | ||
301 | static inline void _req_mod(struct drbd_request *req, enum drbd_req_event what) | ||
302 | { | ||
303 | struct drbd_conf *mdev = req->mdev; | ||
304 | struct bio_and_error m; | ||
305 | |||
306 | /* __req_mod possibly frees req, do not touch req after that! */ | ||
307 | __req_mod(req, what, &m); | ||
308 | if (m.bio) | ||
309 | complete_master_bio(mdev, &m); | ||
310 | } | ||
311 | |||
312 | /* completion of master bio is outside of spinlock. | ||
313 | * If you need it irqsave, do it your self! */ | ||
314 | static inline void req_mod(struct drbd_request *req, | ||
315 | enum drbd_req_event what) | ||
316 | { | ||
317 | struct drbd_conf *mdev = req->mdev; | ||
318 | struct bio_and_error m; | ||
319 | spin_lock_irq(&mdev->req_lock); | ||
320 | __req_mod(req, what, &m); | ||
321 | spin_unlock_irq(&mdev->req_lock); | ||
322 | |||
323 | if (m.bio) | ||
324 | complete_master_bio(mdev, &m); | ||
325 | } | ||
326 | #endif | ||