diff options
author | Philipp Reisner <philipp.reisner@linbit.com> | 2009-09-25 19:07:19 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2009-10-01 15:17:49 -0400 |
commit | b411b3637fa71fce9cf2acf0639009500f5892fe (patch) | |
tree | 6b88e5202e0f137fef50e95b0441bcafdbf91990 /drivers/block/drbd/drbd_int.h | |
parent | 1a35e0f6443f4266dad4c569c55c57a9032596fa (diff) |
The DRBD driver
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd/drbd_int.h')
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 2258 |
1 files changed, 2258 insertions, 0 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h new file mode 100644 index 000000000000..8da602e010bb --- /dev/null +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -0,0 +1,2258 @@ | |||
1 | /* | ||
2 | drbd_int.h | ||
3 | |||
4 | This file is part of DRBD by Philipp Reisner and Lars Ellenberg. | ||
5 | |||
6 | Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. | ||
7 | Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. | ||
8 | Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. | ||
9 | |||
10 | drbd is free software; you can redistribute it and/or modify | ||
11 | it under the terms of the GNU General Public License as published by | ||
12 | the Free Software Foundation; either version 2, or (at your option) | ||
13 | any later version. | ||
14 | |||
15 | drbd is distributed in the hope that it will be useful, | ||
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | GNU General Public License for more details. | ||
19 | |||
20 | You should have received a copy of the GNU General Public License | ||
21 | along with drbd; see the file COPYING. If not, write to | ||
22 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
23 | |||
24 | */ | ||
25 | |||
26 | #ifndef _DRBD_INT_H | ||
27 | #define _DRBD_INT_H | ||
28 | |||
29 | #include <linux/compiler.h> | ||
30 | #include <linux/types.h> | ||
31 | #include <linux/version.h> | ||
32 | #include <linux/list.h> | ||
33 | #include <linux/sched.h> | ||
34 | #include <linux/bitops.h> | ||
35 | #include <linux/slab.h> | ||
36 | #include <linux/crypto.h> | ||
37 | #include <linux/tcp.h> | ||
38 | #include <linux/mutex.h> | ||
39 | #include <linux/major.h> | ||
40 | #include <linux/blkdev.h> | ||
41 | #include <linux/genhd.h> | ||
42 | #include <net/tcp.h> | ||
43 | #include <linux/lru_cache.h> | ||
44 | |||
45 | #ifdef __CHECKER__ | ||
46 | # define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr"))) | ||
47 | # define __protected_read_by(x) __attribute__((require_context(x,1,999,"read"))) | ||
48 | # define __protected_write_by(x) __attribute__((require_context(x,1,999,"write"))) | ||
49 | # define __must_hold(x) __attribute__((context(x,1,1), require_context(x,1,999,"call"))) | ||
50 | #else | ||
51 | # define __protected_by(x) | ||
52 | # define __protected_read_by(x) | ||
53 | # define __protected_write_by(x) | ||
54 | # define __must_hold(x) | ||
55 | #endif | ||
56 | |||
57 | #define __no_warn(lock, stmt) do { __acquire(lock); stmt; __release(lock); } while (0) | ||
58 | |||
59 | /* module parameter, defined in drbd_main.c */ | ||
60 | extern unsigned int minor_count; | ||
61 | extern int disable_sendpage; | ||
62 | extern int allow_oos; | ||
63 | extern unsigned int cn_idx; | ||
64 | |||
65 | #ifdef CONFIG_DRBD_FAULT_INJECTION | ||
66 | extern int enable_faults; | ||
67 | extern int fault_rate; | ||
68 | extern int fault_devs; | ||
69 | #endif | ||
70 | |||
71 | extern char usermode_helper[]; | ||
72 | |||
73 | |||
74 | #ifndef TRUE | ||
75 | #define TRUE 1 | ||
76 | #endif | ||
77 | #ifndef FALSE | ||
78 | #define FALSE 0 | ||
79 | #endif | ||
80 | |||
81 | /* I don't remember why XCPU ... | ||
82 | * This is used to wake the asender, | ||
83 | * and to interrupt sending the sending task | ||
84 | * on disconnect. | ||
85 | */ | ||
86 | #define DRBD_SIG SIGXCPU | ||
87 | |||
88 | /* This is used to stop/restart our threads. | ||
89 | * Cannot use SIGTERM nor SIGKILL, since these | ||
90 | * are sent out by init on runlevel changes | ||
91 | * I choose SIGHUP for now. | ||
92 | */ | ||
93 | #define DRBD_SIGKILL SIGHUP | ||
94 | |||
95 | /* All EEs on the free list should have ID_VACANT (== 0) | ||
96 | * freshly allocated EEs get !ID_VACANT (== 1) | ||
97 | * so if it says "cannot dereference null pointer at adress 0x00000001", | ||
98 | * it is most likely one of these :( */ | ||
99 | |||
100 | #define ID_IN_SYNC (4711ULL) | ||
101 | #define ID_OUT_OF_SYNC (4712ULL) | ||
102 | |||
103 | #define ID_SYNCER (-1ULL) | ||
104 | #define ID_VACANT 0 | ||
105 | #define is_syncer_block_id(id) ((id) == ID_SYNCER) | ||
106 | |||
107 | struct drbd_conf; | ||
108 | |||
109 | |||
110 | /* to shorten dev_warn(DEV, "msg"); and relatives statements */ | ||
111 | #define DEV (disk_to_dev(mdev->vdisk)) | ||
112 | |||
113 | #define D_ASSERT(exp) if (!(exp)) \ | ||
114 | dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__) | ||
115 | |||
116 | #define ERR_IF(exp) if (({ \ | ||
117 | int _b = (exp) != 0; \ | ||
118 | if (_b) dev_err(DEV, "%s: (%s) in %s:%d\n", \ | ||
119 | __func__, #exp, __FILE__, __LINE__); \ | ||
120 | _b; \ | ||
121 | })) | ||
122 | |||
123 | /* Defines to control fault insertion */ | ||
124 | enum { | ||
125 | DRBD_FAULT_MD_WR = 0, /* meta data write */ | ||
126 | DRBD_FAULT_MD_RD = 1, /* read */ | ||
127 | DRBD_FAULT_RS_WR = 2, /* resync */ | ||
128 | DRBD_FAULT_RS_RD = 3, | ||
129 | DRBD_FAULT_DT_WR = 4, /* data */ | ||
130 | DRBD_FAULT_DT_RD = 5, | ||
131 | DRBD_FAULT_DT_RA = 6, /* data read ahead */ | ||
132 | DRBD_FAULT_BM_ALLOC = 7, /* bitmap allocation */ | ||
133 | DRBD_FAULT_AL_EE = 8, /* alloc ee */ | ||
134 | |||
135 | DRBD_FAULT_MAX, | ||
136 | }; | ||
137 | |||
138 | extern void trace_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, ...); | ||
139 | |||
140 | #ifdef CONFIG_DRBD_FAULT_INJECTION | ||
141 | extern unsigned int | ||
142 | _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type); | ||
143 | static inline int | ||
144 | drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { | ||
145 | return fault_rate && | ||
146 | (enable_faults & (1<<type)) && | ||
147 | _drbd_insert_fault(mdev, type); | ||
148 | } | ||
149 | #define FAULT_ACTIVE(_m, _t) (drbd_insert_fault((_m), (_t))) | ||
150 | |||
151 | #else | ||
152 | #define FAULT_ACTIVE(_m, _t) (0) | ||
153 | #endif | ||
154 | |||
155 | /* integer division, round _UP_ to the next integer */ | ||
156 | #define div_ceil(A, B) ((A)/(B) + ((A)%(B) ? 1 : 0)) | ||
157 | /* usual integer division */ | ||
158 | #define div_floor(A, B) ((A)/(B)) | ||
159 | |||
160 | /* drbd_meta-data.c (still in drbd_main.c) */ | ||
161 | /* 4th incarnation of the disk layout. */ | ||
162 | #define DRBD_MD_MAGIC (DRBD_MAGIC+4) | ||
163 | |||
164 | extern struct drbd_conf **minor_table; | ||
165 | extern struct ratelimit_state drbd_ratelimit_state; | ||
166 | |||
167 | /* on the wire */ | ||
168 | enum drbd_packets { | ||
169 | /* receiver (data socket) */ | ||
170 | P_DATA = 0x00, | ||
171 | P_DATA_REPLY = 0x01, /* Response to P_DATA_REQUEST */ | ||
172 | P_RS_DATA_REPLY = 0x02, /* Response to P_RS_DATA_REQUEST */ | ||
173 | P_BARRIER = 0x03, | ||
174 | P_BITMAP = 0x04, | ||
175 | P_BECOME_SYNC_TARGET = 0x05, | ||
176 | P_BECOME_SYNC_SOURCE = 0x06, | ||
177 | P_UNPLUG_REMOTE = 0x07, /* Used at various times to hint the peer */ | ||
178 | P_DATA_REQUEST = 0x08, /* Used to ask for a data block */ | ||
179 | P_RS_DATA_REQUEST = 0x09, /* Used to ask for a data block for resync */ | ||
180 | P_SYNC_PARAM = 0x0a, | ||
181 | P_PROTOCOL = 0x0b, | ||
182 | P_UUIDS = 0x0c, | ||
183 | P_SIZES = 0x0d, | ||
184 | P_STATE = 0x0e, | ||
185 | P_SYNC_UUID = 0x0f, | ||
186 | P_AUTH_CHALLENGE = 0x10, | ||
187 | P_AUTH_RESPONSE = 0x11, | ||
188 | P_STATE_CHG_REQ = 0x12, | ||
189 | |||
190 | /* asender (meta socket */ | ||
191 | P_PING = 0x13, | ||
192 | P_PING_ACK = 0x14, | ||
193 | P_RECV_ACK = 0x15, /* Used in protocol B */ | ||
194 | P_WRITE_ACK = 0x16, /* Used in protocol C */ | ||
195 | P_RS_WRITE_ACK = 0x17, /* Is a P_WRITE_ACK, additionally call set_in_sync(). */ | ||
196 | P_DISCARD_ACK = 0x18, /* Used in proto C, two-primaries conflict detection */ | ||
197 | P_NEG_ACK = 0x19, /* Sent if local disk is unusable */ | ||
198 | P_NEG_DREPLY = 0x1a, /* Local disk is broken... */ | ||
199 | P_NEG_RS_DREPLY = 0x1b, /* Local disk is broken... */ | ||
200 | P_BARRIER_ACK = 0x1c, | ||
201 | P_STATE_CHG_REPLY = 0x1d, | ||
202 | |||
203 | /* "new" commands, no longer fitting into the ordering scheme above */ | ||
204 | |||
205 | P_OV_REQUEST = 0x1e, /* data socket */ | ||
206 | P_OV_REPLY = 0x1f, | ||
207 | P_OV_RESULT = 0x20, /* meta socket */ | ||
208 | P_CSUM_RS_REQUEST = 0x21, /* data socket */ | ||
209 | P_RS_IS_IN_SYNC = 0x22, /* meta socket */ | ||
210 | P_SYNC_PARAM89 = 0x23, /* data socket, protocol version 89 replacement for P_SYNC_PARAM */ | ||
211 | P_COMPRESSED_BITMAP = 0x24, /* compressed or otherwise encoded bitmap transfer */ | ||
212 | |||
213 | P_MAX_CMD = 0x25, | ||
214 | P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ | ||
215 | P_MAX_OPT_CMD = 0x101, | ||
216 | |||
217 | /* special command ids for handshake */ | ||
218 | |||
219 | P_HAND_SHAKE_M = 0xfff1, /* First Packet on the MetaSock */ | ||
220 | P_HAND_SHAKE_S = 0xfff2, /* First Packet on the Socket */ | ||
221 | |||
222 | P_HAND_SHAKE = 0xfffe /* FIXED for the next century! */ | ||
223 | }; | ||
224 | |||
225 | static inline const char *cmdname(enum drbd_packets cmd) | ||
226 | { | ||
227 | /* THINK may need to become several global tables | ||
228 | * when we want to support more than | ||
229 | * one PRO_VERSION */ | ||
230 | static const char *cmdnames[] = { | ||
231 | [P_DATA] = "Data", | ||
232 | [P_DATA_REPLY] = "DataReply", | ||
233 | [P_RS_DATA_REPLY] = "RSDataReply", | ||
234 | [P_BARRIER] = "Barrier", | ||
235 | [P_BITMAP] = "ReportBitMap", | ||
236 | [P_BECOME_SYNC_TARGET] = "BecomeSyncTarget", | ||
237 | [P_BECOME_SYNC_SOURCE] = "BecomeSyncSource", | ||
238 | [P_UNPLUG_REMOTE] = "UnplugRemote", | ||
239 | [P_DATA_REQUEST] = "DataRequest", | ||
240 | [P_RS_DATA_REQUEST] = "RSDataRequest", | ||
241 | [P_SYNC_PARAM] = "SyncParam", | ||
242 | [P_SYNC_PARAM89] = "SyncParam89", | ||
243 | [P_PROTOCOL] = "ReportProtocol", | ||
244 | [P_UUIDS] = "ReportUUIDs", | ||
245 | [P_SIZES] = "ReportSizes", | ||
246 | [P_STATE] = "ReportState", | ||
247 | [P_SYNC_UUID] = "ReportSyncUUID", | ||
248 | [P_AUTH_CHALLENGE] = "AuthChallenge", | ||
249 | [P_AUTH_RESPONSE] = "AuthResponse", | ||
250 | [P_PING] = "Ping", | ||
251 | [P_PING_ACK] = "PingAck", | ||
252 | [P_RECV_ACK] = "RecvAck", | ||
253 | [P_WRITE_ACK] = "WriteAck", | ||
254 | [P_RS_WRITE_ACK] = "RSWriteAck", | ||
255 | [P_DISCARD_ACK] = "DiscardAck", | ||
256 | [P_NEG_ACK] = "NegAck", | ||
257 | [P_NEG_DREPLY] = "NegDReply", | ||
258 | [P_NEG_RS_DREPLY] = "NegRSDReply", | ||
259 | [P_BARRIER_ACK] = "BarrierAck", | ||
260 | [P_STATE_CHG_REQ] = "StateChgRequest", | ||
261 | [P_STATE_CHG_REPLY] = "StateChgReply", | ||
262 | [P_OV_REQUEST] = "OVRequest", | ||
263 | [P_OV_REPLY] = "OVReply", | ||
264 | [P_OV_RESULT] = "OVResult", | ||
265 | [P_MAX_CMD] = NULL, | ||
266 | }; | ||
267 | |||
268 | if (cmd == P_HAND_SHAKE_M) | ||
269 | return "HandShakeM"; | ||
270 | if (cmd == P_HAND_SHAKE_S) | ||
271 | return "HandShakeS"; | ||
272 | if (cmd == P_HAND_SHAKE) | ||
273 | return "HandShake"; | ||
274 | if (cmd >= P_MAX_CMD) | ||
275 | return "Unknown"; | ||
276 | return cmdnames[cmd]; | ||
277 | } | ||
278 | |||
279 | /* for sending/receiving the bitmap, | ||
280 | * possibly in some encoding scheme */ | ||
281 | struct bm_xfer_ctx { | ||
282 | /* "const" | ||
283 | * stores total bits and long words | ||
284 | * of the bitmap, so we don't need to | ||
285 | * call the accessor functions over and again. */ | ||
286 | unsigned long bm_bits; | ||
287 | unsigned long bm_words; | ||
288 | /* during xfer, current position within the bitmap */ | ||
289 | unsigned long bit_offset; | ||
290 | unsigned long word_offset; | ||
291 | |||
292 | /* statistics; index: (h->command == P_BITMAP) */ | ||
293 | unsigned packets[2]; | ||
294 | unsigned bytes[2]; | ||
295 | }; | ||
296 | |||
297 | extern void INFO_bm_xfer_stats(struct drbd_conf *mdev, | ||
298 | const char *direction, struct bm_xfer_ctx *c); | ||
299 | |||
300 | static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c) | ||
301 | { | ||
302 | /* word_offset counts "native long words" (32 or 64 bit), | ||
303 | * aligned at 64 bit. | ||
304 | * Encoded packet may end at an unaligned bit offset. | ||
305 | * In case a fallback clear text packet is transmitted in | ||
306 | * between, we adjust this offset back to the last 64bit | ||
307 | * aligned "native long word", which makes coding and decoding | ||
308 | * the plain text bitmap much more convenient. */ | ||
309 | #if BITS_PER_LONG == 64 | ||
310 | c->word_offset = c->bit_offset >> 6; | ||
311 | #elif BITS_PER_LONG == 32 | ||
312 | c->word_offset = c->bit_offset >> 5; | ||
313 | c->word_offset &= ~(1UL); | ||
314 | #else | ||
315 | # error "unsupported BITS_PER_LONG" | ||
316 | #endif | ||
317 | } | ||
318 | |||
319 | #ifndef __packed | ||
320 | #define __packed __attribute__((packed)) | ||
321 | #endif | ||
322 | |||
323 | /* This is the layout for a packet on the wire. | ||
324 | * The byteorder is the network byte order. | ||
325 | * (except block_id and barrier fields. | ||
326 | * these are pointers to local structs | ||
327 | * and have no relevance for the partner, | ||
328 | * which just echoes them as received.) | ||
329 | * | ||
330 | * NOTE that the payload starts at a long aligned offset, | ||
331 | * regardless of 32 or 64 bit arch! | ||
332 | */ | ||
333 | struct p_header { | ||
334 | u32 magic; | ||
335 | u16 command; | ||
336 | u16 length; /* bytes of data after this header */ | ||
337 | u8 payload[0]; | ||
338 | } __packed; | ||
339 | /* 8 bytes. packet FIXED for the next century! */ | ||
340 | |||
341 | /* | ||
342 | * short commands, packets without payload, plain p_header: | ||
343 | * P_PING | ||
344 | * P_PING_ACK | ||
345 | * P_BECOME_SYNC_TARGET | ||
346 | * P_BECOME_SYNC_SOURCE | ||
347 | * P_UNPLUG_REMOTE | ||
348 | */ | ||
349 | |||
350 | /* | ||
351 | * commands with out-of-struct payload: | ||
352 | * P_BITMAP (no additional fields) | ||
353 | * P_DATA, P_DATA_REPLY (see p_data) | ||
354 | * P_COMPRESSED_BITMAP (see receive_compressed_bitmap) | ||
355 | */ | ||
356 | |||
357 | /* these defines must not be changed without changing the protocol version */ | ||
358 | #define DP_HARDBARRIER 1 | ||
359 | #define DP_RW_SYNC 2 | ||
360 | #define DP_MAY_SET_IN_SYNC 4 | ||
361 | |||
362 | struct p_data { | ||
363 | struct p_header head; | ||
364 | u64 sector; /* 64 bits sector number */ | ||
365 | u64 block_id; /* to identify the request in protocol B&C */ | ||
366 | u32 seq_num; | ||
367 | u32 dp_flags; | ||
368 | } __packed; | ||
369 | |||
370 | /* | ||
371 | * commands which share a struct: | ||
372 | * p_block_ack: | ||
373 | * P_RECV_ACK (proto B), P_WRITE_ACK (proto C), | ||
374 | * P_DISCARD_ACK (proto C, two-primaries conflict detection) | ||
375 | * p_block_req: | ||
376 | * P_DATA_REQUEST, P_RS_DATA_REQUEST | ||
377 | */ | ||
378 | struct p_block_ack { | ||
379 | struct p_header head; | ||
380 | u64 sector; | ||
381 | u64 block_id; | ||
382 | u32 blksize; | ||
383 | u32 seq_num; | ||
384 | } __packed; | ||
385 | |||
386 | |||
387 | struct p_block_req { | ||
388 | struct p_header head; | ||
389 | u64 sector; | ||
390 | u64 block_id; | ||
391 | u32 blksize; | ||
392 | u32 pad; /* to multiple of 8 Byte */ | ||
393 | } __packed; | ||
394 | |||
395 | /* | ||
396 | * commands with their own struct for additional fields: | ||
397 | * P_HAND_SHAKE | ||
398 | * P_BARRIER | ||
399 | * P_BARRIER_ACK | ||
400 | * P_SYNC_PARAM | ||
401 | * ReportParams | ||
402 | */ | ||
403 | |||
404 | struct p_handshake { | ||
405 | struct p_header head; /* 8 bytes */ | ||
406 | u32 protocol_min; | ||
407 | u32 feature_flags; | ||
408 | u32 protocol_max; | ||
409 | |||
410 | /* should be more than enough for future enhancements | ||
411 | * for now, feature_flags and the reserverd array shall be zero. | ||
412 | */ | ||
413 | |||
414 | u32 _pad; | ||
415 | u64 reserverd[7]; | ||
416 | } __packed; | ||
417 | /* 80 bytes, FIXED for the next century */ | ||
418 | |||
419 | struct p_barrier { | ||
420 | struct p_header head; | ||
421 | u32 barrier; /* barrier number _handle_ only */ | ||
422 | u32 pad; /* to multiple of 8 Byte */ | ||
423 | } __packed; | ||
424 | |||
425 | struct p_barrier_ack { | ||
426 | struct p_header head; | ||
427 | u32 barrier; | ||
428 | u32 set_size; | ||
429 | } __packed; | ||
430 | |||
431 | struct p_rs_param { | ||
432 | struct p_header head; | ||
433 | u32 rate; | ||
434 | |||
435 | /* Since protocol version 88 and higher. */ | ||
436 | char verify_alg[0]; | ||
437 | } __packed; | ||
438 | |||
439 | struct p_rs_param_89 { | ||
440 | struct p_header head; | ||
441 | u32 rate; | ||
442 | /* protocol version 89: */ | ||
443 | char verify_alg[SHARED_SECRET_MAX]; | ||
444 | char csums_alg[SHARED_SECRET_MAX]; | ||
445 | } __packed; | ||
446 | |||
447 | struct p_protocol { | ||
448 | struct p_header head; | ||
449 | u32 protocol; | ||
450 | u32 after_sb_0p; | ||
451 | u32 after_sb_1p; | ||
452 | u32 after_sb_2p; | ||
453 | u32 want_lose; | ||
454 | u32 two_primaries; | ||
455 | |||
456 | /* Since protocol version 87 and higher. */ | ||
457 | char integrity_alg[0]; | ||
458 | |||
459 | } __packed; | ||
460 | |||
461 | struct p_uuids { | ||
462 | struct p_header head; | ||
463 | u64 uuid[UI_EXTENDED_SIZE]; | ||
464 | } __packed; | ||
465 | |||
466 | struct p_rs_uuid { | ||
467 | struct p_header head; | ||
468 | u64 uuid; | ||
469 | } __packed; | ||
470 | |||
471 | struct p_sizes { | ||
472 | struct p_header head; | ||
473 | u64 d_size; /* size of disk */ | ||
474 | u64 u_size; /* user requested size */ | ||
475 | u64 c_size; /* current exported size */ | ||
476 | u32 max_segment_size; /* Maximal size of a BIO */ | ||
477 | u32 queue_order_type; | ||
478 | } __packed; | ||
479 | |||
480 | struct p_state { | ||
481 | struct p_header head; | ||
482 | u32 state; | ||
483 | } __packed; | ||
484 | |||
485 | struct p_req_state { | ||
486 | struct p_header head; | ||
487 | u32 mask; | ||
488 | u32 val; | ||
489 | } __packed; | ||
490 | |||
491 | struct p_req_state_reply { | ||
492 | struct p_header head; | ||
493 | u32 retcode; | ||
494 | } __packed; | ||
495 | |||
496 | struct p_drbd06_param { | ||
497 | u64 size; | ||
498 | u32 state; | ||
499 | u32 blksize; | ||
500 | u32 protocol; | ||
501 | u32 version; | ||
502 | u32 gen_cnt[5]; | ||
503 | u32 bit_map_gen[5]; | ||
504 | } __packed; | ||
505 | |||
506 | struct p_discard { | ||
507 | struct p_header head; | ||
508 | u64 block_id; | ||
509 | u32 seq_num; | ||
510 | u32 pad; | ||
511 | } __packed; | ||
512 | |||
513 | /* Valid values for the encoding field. | ||
514 | * Bump proto version when changing this. */ | ||
515 | enum drbd_bitmap_code { | ||
516 | /* RLE_VLI_Bytes = 0, | ||
517 | * and other bit variants had been defined during | ||
518 | * algorithm evaluation. */ | ||
519 | RLE_VLI_Bits = 2, | ||
520 | }; | ||
521 | |||
522 | struct p_compressed_bm { | ||
523 | struct p_header head; | ||
524 | /* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code | ||
525 | * (encoding & 0x80): polarity (set/unset) of first runlength | ||
526 | * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits | ||
527 | * used to pad up to head.length bytes | ||
528 | */ | ||
529 | u8 encoding; | ||
530 | |||
531 | u8 code[0]; | ||
532 | } __packed; | ||
533 | |||
534 | /* DCBP: Drbd Compressed Bitmap Packet ... */ | ||
535 | static inline enum drbd_bitmap_code | ||
536 | DCBP_get_code(struct p_compressed_bm *p) | ||
537 | { | ||
538 | return (enum drbd_bitmap_code)(p->encoding & 0x0f); | ||
539 | } | ||
540 | |||
541 | static inline void | ||
542 | DCBP_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code) | ||
543 | { | ||
544 | BUG_ON(code & ~0xf); | ||
545 | p->encoding = (p->encoding & ~0xf) | code; | ||
546 | } | ||
547 | |||
548 | static inline int | ||
549 | DCBP_get_start(struct p_compressed_bm *p) | ||
550 | { | ||
551 | return (p->encoding & 0x80) != 0; | ||
552 | } | ||
553 | |||
554 | static inline void | ||
555 | DCBP_set_start(struct p_compressed_bm *p, int set) | ||
556 | { | ||
557 | p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0); | ||
558 | } | ||
559 | |||
560 | static inline int | ||
561 | DCBP_get_pad_bits(struct p_compressed_bm *p) | ||
562 | { | ||
563 | return (p->encoding >> 4) & 0x7; | ||
564 | } | ||
565 | |||
566 | static inline void | ||
567 | DCBP_set_pad_bits(struct p_compressed_bm *p, int n) | ||
568 | { | ||
569 | BUG_ON(n & ~0x7); | ||
570 | p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4); | ||
571 | } | ||
572 | |||
573 | /* one bitmap packet, including the p_header, | ||
574 | * should fit within one _architecture independend_ page. | ||
575 | * so we need to use the fixed size 4KiB page size | ||
576 | * most architechtures have used for a long time. | ||
577 | */ | ||
578 | #define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header)) | ||
579 | #define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long)) | ||
580 | #define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm)) | ||
581 | #if (PAGE_SIZE < 4096) | ||
582 | /* drbd_send_bitmap / receive_bitmap would break horribly */ | ||
583 | #error "PAGE_SIZE too small" | ||
584 | #endif | ||
585 | |||
586 | union p_polymorph { | ||
587 | struct p_header header; | ||
588 | struct p_handshake handshake; | ||
589 | struct p_data data; | ||
590 | struct p_block_ack block_ack; | ||
591 | struct p_barrier barrier; | ||
592 | struct p_barrier_ack barrier_ack; | ||
593 | struct p_rs_param_89 rs_param_89; | ||
594 | struct p_protocol protocol; | ||
595 | struct p_sizes sizes; | ||
596 | struct p_uuids uuids; | ||
597 | struct p_state state; | ||
598 | struct p_req_state req_state; | ||
599 | struct p_req_state_reply req_state_reply; | ||
600 | struct p_block_req block_req; | ||
601 | } __packed; | ||
602 | |||
603 | /**********************************************************************/ | ||
604 | enum drbd_thread_state { | ||
605 | None, | ||
606 | Running, | ||
607 | Exiting, | ||
608 | Restarting | ||
609 | }; | ||
610 | |||
611 | struct drbd_thread { | ||
612 | spinlock_t t_lock; | ||
613 | struct task_struct *task; | ||
614 | struct completion stop; | ||
615 | enum drbd_thread_state t_state; | ||
616 | int (*function) (struct drbd_thread *); | ||
617 | struct drbd_conf *mdev; | ||
618 | int reset_cpu_mask; | ||
619 | }; | ||
620 | |||
621 | static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi) | ||
622 | { | ||
623 | /* THINK testing the t_state seems to be uncritical in all cases | ||
624 | * (but thread_{start,stop}), so we can read it *without* the lock. | ||
625 | * --lge */ | ||
626 | |||
627 | smp_rmb(); | ||
628 | return thi->t_state; | ||
629 | } | ||
630 | |||
631 | |||
632 | /* | ||
633 | * Having this as the first member of a struct provides sort of "inheritance". | ||
634 | * "derived" structs can be "drbd_queue_work()"ed. | ||
635 | * The callback should know and cast back to the descendant struct. | ||
636 | * drbd_request and drbd_epoch_entry are descendants of drbd_work. | ||
637 | */ | ||
638 | struct drbd_work; | ||
639 | typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel); | ||
640 | struct drbd_work { | ||
641 | struct list_head list; | ||
642 | drbd_work_cb cb; | ||
643 | }; | ||
644 | |||
645 | struct drbd_tl_epoch; | ||
646 | struct drbd_request { | ||
647 | struct drbd_work w; | ||
648 | struct drbd_conf *mdev; | ||
649 | |||
650 | /* if local IO is not allowed, will be NULL. | ||
651 | * if local IO _is_ allowed, holds the locally submitted bio clone, | ||
652 | * or, after local IO completion, the ERR_PTR(error). | ||
653 | * see drbd_endio_pri(). */ | ||
654 | struct bio *private_bio; | ||
655 | |||
656 | struct hlist_node colision; | ||
657 | sector_t sector; | ||
658 | unsigned int size; | ||
659 | unsigned int epoch; /* barrier_nr */ | ||
660 | |||
661 | /* barrier_nr: used to check on "completion" whether this req was in | ||
662 | * the current epoch, and we therefore have to close it, | ||
663 | * starting a new epoch... | ||
664 | */ | ||
665 | |||
666 | /* up to here, the struct layout is identical to drbd_epoch_entry; | ||
667 | * we might be able to use that to our advantage... */ | ||
668 | |||
669 | struct list_head tl_requests; /* ring list in the transfer log */ | ||
670 | struct bio *master_bio; /* master bio pointer */ | ||
671 | unsigned long rq_state; /* see comments above _req_mod() */ | ||
672 | int seq_num; | ||
673 | unsigned long start_time; | ||
674 | }; | ||
675 | |||
676 | struct drbd_tl_epoch { | ||
677 | struct drbd_work w; | ||
678 | struct list_head requests; /* requests before */ | ||
679 | struct drbd_tl_epoch *next; /* pointer to the next barrier */ | ||
680 | unsigned int br_number; /* the barriers identifier. */ | ||
681 | int n_req; /* number of requests attached before this barrier */ | ||
682 | }; | ||
683 | |||
684 | struct drbd_request; | ||
685 | |||
686 | /* These Tl_epoch_entries may be in one of 6 lists: | ||
687 | active_ee .. data packet being written | ||
688 | sync_ee .. syncer block being written | ||
689 | done_ee .. block written, need to send P_WRITE_ACK | ||
690 | read_ee .. [RS]P_DATA_REQUEST being read | ||
691 | */ | ||
692 | |||
693 | struct drbd_epoch { | ||
694 | struct list_head list; | ||
695 | unsigned int barrier_nr; | ||
696 | atomic_t epoch_size; /* increased on every request added. */ | ||
697 | atomic_t active; /* increased on every req. added, and dec on every finished. */ | ||
698 | unsigned long flags; | ||
699 | }; | ||
700 | |||
701 | /* drbd_epoch flag bits */ | ||
702 | enum { | ||
703 | DE_BARRIER_IN_NEXT_EPOCH_ISSUED, | ||
704 | DE_BARRIER_IN_NEXT_EPOCH_DONE, | ||
705 | DE_CONTAINS_A_BARRIER, | ||
706 | DE_HAVE_BARRIER_NUMBER, | ||
707 | DE_IS_FINISHING, | ||
708 | }; | ||
709 | |||
710 | enum epoch_event { | ||
711 | EV_PUT, | ||
712 | EV_GOT_BARRIER_NR, | ||
713 | EV_BARRIER_DONE, | ||
714 | EV_BECAME_LAST, | ||
715 | EV_TRACE_FLUSH, /* TRACE_ are not real events, only used for tracing */ | ||
716 | EV_TRACE_ADD_BARRIER, /* Doing the first write as a barrier write */ | ||
717 | EV_TRACE_SETTING_BI, /* Barrier is expressed with the first write of the next epoch */ | ||
718 | EV_TRACE_ALLOC, | ||
719 | EV_TRACE_FREE, | ||
720 | EV_CLEANUP = 32, /* used as flag */ | ||
721 | }; | ||
722 | |||
723 | struct drbd_epoch_entry { | ||
724 | struct drbd_work w; | ||
725 | struct drbd_conf *mdev; | ||
726 | struct bio *private_bio; | ||
727 | struct hlist_node colision; | ||
728 | sector_t sector; | ||
729 | unsigned int size; | ||
730 | struct drbd_epoch *epoch; | ||
731 | |||
732 | /* up to here, the struct layout is identical to drbd_request; | ||
733 | * we might be able to use that to our advantage... */ | ||
734 | |||
735 | unsigned int flags; | ||
736 | u64 block_id; | ||
737 | }; | ||
738 | |||
739 | struct drbd_wq_barrier { | ||
740 | struct drbd_work w; | ||
741 | struct completion done; | ||
742 | }; | ||
743 | |||
744 | struct digest_info { | ||
745 | int digest_size; | ||
746 | void *digest; | ||
747 | }; | ||
748 | |||
749 | /* ee flag bits */ | ||
750 | enum { | ||
751 | __EE_CALL_AL_COMPLETE_IO, | ||
752 | __EE_CONFLICT_PENDING, | ||
753 | __EE_MAY_SET_IN_SYNC, | ||
754 | __EE_IS_BARRIER, | ||
755 | }; | ||
756 | #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) | ||
757 | #define EE_CONFLICT_PENDING (1<<__EE_CONFLICT_PENDING) | ||
758 | #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) | ||
759 | #define EE_IS_BARRIER (1<<__EE_IS_BARRIER) | ||
760 | |||
761 | /* global flag bits */ | ||
762 | enum { | ||
763 | CREATE_BARRIER, /* next P_DATA is preceeded by a P_BARRIER */ | ||
764 | SIGNAL_ASENDER, /* whether asender wants to be interrupted */ | ||
765 | SEND_PING, /* whether asender should send a ping asap */ | ||
766 | |||
767 | STOP_SYNC_TIMER, /* tell timer to cancel itself */ | ||
768 | UNPLUG_QUEUED, /* only relevant with kernel 2.4 */ | ||
769 | UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */ | ||
770 | MD_DIRTY, /* current uuids and flags not yet on disk */ | ||
771 | DISCARD_CONCURRENT, /* Set on one node, cleared on the peer! */ | ||
772 | USE_DEGR_WFC_T, /* degr-wfc-timeout instead of wfc-timeout. */ | ||
773 | CLUSTER_ST_CHANGE, /* Cluster wide state change going on... */ | ||
774 | CL_ST_CHG_SUCCESS, | ||
775 | CL_ST_CHG_FAIL, | ||
776 | CRASHED_PRIMARY, /* This node was a crashed primary. | ||
777 | * Gets cleared when the state.conn | ||
778 | * goes into C_CONNECTED state. */ | ||
779 | WRITE_BM_AFTER_RESYNC, /* A kmalloc() during resync failed */ | ||
780 | NO_BARRIER_SUPP, /* underlying block device doesn't implement barriers */ | ||
781 | CONSIDER_RESYNC, | ||
782 | |||
783 | MD_NO_BARRIER, /* meta data device does not support barriers, | ||
784 | so don't even try */ | ||
785 | SUSPEND_IO, /* suspend application io */ | ||
786 | BITMAP_IO, /* suspend application io; | ||
787 | once no more io in flight, start bitmap io */ | ||
788 | BITMAP_IO_QUEUED, /* Started bitmap IO */ | ||
789 | RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ | ||
790 | NET_CONGESTED, /* The data socket is congested */ | ||
791 | |||
792 | CONFIG_PENDING, /* serialization of (re)configuration requests. | ||
793 | * if set, also prevents the device from dying */ | ||
794 | DEVICE_DYING, /* device became unconfigured, | ||
795 | * but worker thread is still handling the cleanup. | ||
796 | * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed, | ||
797 | * while this is set. */ | ||
798 | RESIZE_PENDING, /* Size change detected locally, waiting for the response from | ||
799 | * the peer, if it changed there as well. */ | ||
800 | }; | ||
801 | |||
802 | struct drbd_bitmap; /* opaque for drbd_conf */ | ||
803 | |||
804 | /* TODO sort members for performance | ||
805 | * MAYBE group them further */ | ||
806 | |||
807 | /* THINK maybe we actually want to use the default "event/%s" worker threads | ||
808 | * or similar in linux 2.6, which uses per cpu data and threads. | ||
809 | * | ||
810 | * To be general, this might need a spin_lock member. | ||
811 | * For now, please use the mdev->req_lock to protect list_head, | ||
812 | * see drbd_queue_work below. | ||
813 | */ | ||
814 | struct drbd_work_queue { | ||
815 | struct list_head q; | ||
816 | struct semaphore s; /* producers up it, worker down()s it */ | ||
817 | spinlock_t q_lock; /* to protect the list. */ | ||
818 | }; | ||
819 | |||
820 | struct drbd_socket { | ||
821 | struct drbd_work_queue work; | ||
822 | struct mutex mutex; | ||
823 | struct socket *socket; | ||
824 | /* this way we get our | ||
825 | * send/receive buffers off the stack */ | ||
826 | union p_polymorph sbuf; | ||
827 | union p_polymorph rbuf; | ||
828 | }; | ||
829 | |||
830 | struct drbd_md { | ||
831 | u64 md_offset; /* sector offset to 'super' block */ | ||
832 | |||
833 | u64 la_size_sect; /* last agreed size, unit sectors */ | ||
834 | u64 uuid[UI_SIZE]; | ||
835 | u64 device_uuid; | ||
836 | u32 flags; | ||
837 | u32 md_size_sect; | ||
838 | |||
839 | s32 al_offset; /* signed relative sector offset to al area */ | ||
840 | s32 bm_offset; /* signed relative sector offset to bitmap */ | ||
841 | |||
842 | /* u32 al_nr_extents; important for restoring the AL | ||
843 | * is stored into sync_conf.al_extents, which in turn | ||
844 | * gets applied to act_log->nr_elements | ||
845 | */ | ||
846 | }; | ||
847 | |||
848 | /* for sync_conf and other types... */ | ||
849 | #define NL_PACKET(name, number, fields) struct name { fields }; | ||
850 | #define NL_INTEGER(pn,pr,member) int member; | ||
851 | #define NL_INT64(pn,pr,member) __u64 member; | ||
852 | #define NL_BIT(pn,pr,member) unsigned member:1; | ||
853 | #define NL_STRING(pn,pr,member,len) unsigned char member[len]; int member ## _len; | ||
854 | #include "linux/drbd_nl.h" | ||
855 | |||
856 | struct drbd_backing_dev { | ||
857 | struct block_device *backing_bdev; | ||
858 | struct block_device *md_bdev; | ||
859 | struct file *lo_file; | ||
860 | struct file *md_file; | ||
861 | struct drbd_md md; | ||
862 | struct disk_conf dc; /* The user provided config... */ | ||
863 | sector_t known_size; /* last known size of that backing device */ | ||
864 | }; | ||
865 | |||
866 | struct drbd_md_io { | ||
867 | struct drbd_conf *mdev; | ||
868 | struct completion event; | ||
869 | int error; | ||
870 | }; | ||
871 | |||
872 | struct bm_io_work { | ||
873 | struct drbd_work w; | ||
874 | char *why; | ||
875 | int (*io_fn)(struct drbd_conf *mdev); | ||
876 | void (*done)(struct drbd_conf *mdev, int rv); | ||
877 | }; | ||
878 | |||
879 | enum write_ordering_e { | ||
880 | WO_none, | ||
881 | WO_drain_io, | ||
882 | WO_bdev_flush, | ||
883 | WO_bio_barrier | ||
884 | }; | ||
885 | |||
886 | struct drbd_conf { | ||
887 | /* things that are stored as / read from meta data on disk */ | ||
888 | unsigned long flags; | ||
889 | |||
890 | /* configured by drbdsetup */ | ||
891 | struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ | ||
892 | struct syncer_conf sync_conf; | ||
893 | struct drbd_backing_dev *ldev __protected_by(local); | ||
894 | |||
895 | sector_t p_size; /* partner's disk size */ | ||
896 | struct request_queue *rq_queue; | ||
897 | struct block_device *this_bdev; | ||
898 | struct gendisk *vdisk; | ||
899 | |||
900 | struct drbd_socket data; /* data/barrier/cstate/parameter packets */ | ||
901 | struct drbd_socket meta; /* ping/ack (metadata) packets */ | ||
902 | int agreed_pro_version; /* actually used protocol version */ | ||
903 | unsigned long last_received; /* in jiffies, either socket */ | ||
904 | unsigned int ko_count; | ||
905 | struct drbd_work resync_work, | ||
906 | unplug_work, | ||
907 | md_sync_work; | ||
908 | struct timer_list resync_timer; | ||
909 | struct timer_list md_sync_timer; | ||
910 | |||
911 | /* Used after attach while negotiating new disk state. */ | ||
912 | union drbd_state new_state_tmp; | ||
913 | |||
914 | union drbd_state state; | ||
915 | wait_queue_head_t misc_wait; | ||
916 | wait_queue_head_t state_wait; /* upon each state change. */ | ||
917 | unsigned int send_cnt; | ||
918 | unsigned int recv_cnt; | ||
919 | unsigned int read_cnt; | ||
920 | unsigned int writ_cnt; | ||
921 | unsigned int al_writ_cnt; | ||
922 | unsigned int bm_writ_cnt; | ||
923 | atomic_t ap_bio_cnt; /* Requests we need to complete */ | ||
924 | atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */ | ||
925 | atomic_t rs_pending_cnt; /* RS request/data packets on the wire */ | ||
926 | atomic_t unacked_cnt; /* Need to send replys for */ | ||
927 | atomic_t local_cnt; /* Waiting for local completion */ | ||
928 | atomic_t net_cnt; /* Users of net_conf */ | ||
929 | spinlock_t req_lock; | ||
930 | struct drbd_tl_epoch *unused_spare_tle; /* for pre-allocation */ | ||
931 | struct drbd_tl_epoch *newest_tle; | ||
932 | struct drbd_tl_epoch *oldest_tle; | ||
933 | struct list_head out_of_sequence_requests; | ||
934 | struct hlist_head *tl_hash; | ||
935 | unsigned int tl_hash_s; | ||
936 | |||
937 | /* blocks to sync in this run [unit BM_BLOCK_SIZE] */ | ||
938 | unsigned long rs_total; | ||
939 | /* number of sync IOs that failed in this run */ | ||
940 | unsigned long rs_failed; | ||
941 | /* Syncer's start time [unit jiffies] */ | ||
942 | unsigned long rs_start; | ||
943 | /* cumulated time in PausedSyncX state [unit jiffies] */ | ||
944 | unsigned long rs_paused; | ||
945 | /* block not up-to-date at mark [unit BM_BLOCK_SIZE] */ | ||
946 | unsigned long rs_mark_left; | ||
947 | /* marks's time [unit jiffies] */ | ||
948 | unsigned long rs_mark_time; | ||
949 | /* skipped because csum was equeal [unit BM_BLOCK_SIZE] */ | ||
950 | unsigned long rs_same_csum; | ||
951 | |||
952 | /* where does the admin want us to start? (sector) */ | ||
953 | sector_t ov_start_sector; | ||
954 | /* where are we now? (sector) */ | ||
955 | sector_t ov_position; | ||
956 | /* Start sector of out of sync range (to merge printk reporting). */ | ||
957 | sector_t ov_last_oos_start; | ||
958 | /* size of out-of-sync range in sectors. */ | ||
959 | sector_t ov_last_oos_size; | ||
960 | unsigned long ov_left; /* in bits */ | ||
961 | struct crypto_hash *csums_tfm; | ||
962 | struct crypto_hash *verify_tfm; | ||
963 | |||
964 | struct drbd_thread receiver; | ||
965 | struct drbd_thread worker; | ||
966 | struct drbd_thread asender; | ||
967 | struct drbd_bitmap *bitmap; | ||
968 | unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */ | ||
969 | |||
970 | /* Used to track operations of resync... */ | ||
971 | struct lru_cache *resync; | ||
972 | /* Number of locked elements in resync LRU */ | ||
973 | unsigned int resync_locked; | ||
974 | /* resync extent number waiting for application requests */ | ||
975 | unsigned int resync_wenr; | ||
976 | |||
977 | int open_cnt; | ||
978 | u64 *p_uuid; | ||
979 | struct drbd_epoch *current_epoch; | ||
980 | spinlock_t epoch_lock; | ||
981 | unsigned int epochs; | ||
982 | enum write_ordering_e write_ordering; | ||
983 | struct list_head active_ee; /* IO in progress */ | ||
984 | struct list_head sync_ee; /* IO in progress */ | ||
985 | struct list_head done_ee; /* send ack */ | ||
986 | struct list_head read_ee; /* IO in progress */ | ||
987 | struct list_head net_ee; /* zero-copy network send in progress */ | ||
988 | struct hlist_head *ee_hash; /* is proteced by req_lock! */ | ||
989 | unsigned int ee_hash_s; | ||
990 | |||
991 | /* this one is protected by ee_lock, single thread */ | ||
992 | struct drbd_epoch_entry *last_write_w_barrier; | ||
993 | |||
994 | int next_barrier_nr; | ||
995 | struct hlist_head *app_reads_hash; /* is proteced by req_lock */ | ||
996 | struct list_head resync_reads; | ||
997 | atomic_t pp_in_use; | ||
998 | wait_queue_head_t ee_wait; | ||
999 | struct page *md_io_page; /* one page buffer for md_io */ | ||
1000 | struct page *md_io_tmpp; /* for logical_block_size != 512 */ | ||
1001 | struct mutex md_io_mutex; /* protects the md_io_buffer */ | ||
1002 | spinlock_t al_lock; | ||
1003 | wait_queue_head_t al_wait; | ||
1004 | struct lru_cache *act_log; /* activity log */ | ||
1005 | unsigned int al_tr_number; | ||
1006 | int al_tr_cycle; | ||
1007 | int al_tr_pos; /* position of the next transaction in the journal */ | ||
1008 | struct crypto_hash *cram_hmac_tfm; | ||
1009 | struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */ | ||
1010 | struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */ | ||
1011 | void *int_dig_out; | ||
1012 | void *int_dig_in; | ||
1013 | void *int_dig_vv; | ||
1014 | wait_queue_head_t seq_wait; | ||
1015 | atomic_t packet_seq; | ||
1016 | unsigned int peer_seq; | ||
1017 | spinlock_t peer_seq_lock; | ||
1018 | unsigned int minor; | ||
1019 | unsigned long comm_bm_set; /* communicated number of set bits. */ | ||
1020 | cpumask_var_t cpu_mask; | ||
1021 | struct bm_io_work bm_io_work; | ||
1022 | u64 ed_uuid; /* UUID of the exposed data */ | ||
1023 | struct mutex state_mutex; | ||
1024 | char congestion_reason; /* Why we where congested... */ | ||
1025 | }; | ||
1026 | |||
1027 | static inline struct drbd_conf *minor_to_mdev(unsigned int minor) | ||
1028 | { | ||
1029 | struct drbd_conf *mdev; | ||
1030 | |||
1031 | mdev = minor < minor_count ? minor_table[minor] : NULL; | ||
1032 | |||
1033 | return mdev; | ||
1034 | } | ||
1035 | |||
1036 | static inline unsigned int mdev_to_minor(struct drbd_conf *mdev) | ||
1037 | { | ||
1038 | return mdev->minor; | ||
1039 | } | ||
1040 | |||
1041 | /* returns 1 if it was successfull, | ||
1042 | * returns 0 if there was no data socket. | ||
1043 | * so wherever you are going to use the data.socket, e.g. do | ||
1044 | * if (!drbd_get_data_sock(mdev)) | ||
1045 | * return 0; | ||
1046 | * CODE(); | ||
1047 | * drbd_put_data_sock(mdev); | ||
1048 | */ | ||
1049 | static inline int drbd_get_data_sock(struct drbd_conf *mdev) | ||
1050 | { | ||
1051 | mutex_lock(&mdev->data.mutex); | ||
1052 | /* drbd_disconnect() could have called drbd_free_sock() | ||
1053 | * while we were waiting in down()... */ | ||
1054 | if (unlikely(mdev->data.socket == NULL)) { | ||
1055 | mutex_unlock(&mdev->data.mutex); | ||
1056 | return 0; | ||
1057 | } | ||
1058 | return 1; | ||
1059 | } | ||
1060 | |||
1061 | static inline void drbd_put_data_sock(struct drbd_conf *mdev) | ||
1062 | { | ||
1063 | mutex_unlock(&mdev->data.mutex); | ||
1064 | } | ||
1065 | |||
1066 | /* | ||
1067 | * function declarations | ||
1068 | *************************/ | ||
1069 | |||
1070 | /* drbd_main.c */ | ||
1071 | |||
1072 | enum chg_state_flags { | ||
1073 | CS_HARD = 1, | ||
1074 | CS_VERBOSE = 2, | ||
1075 | CS_WAIT_COMPLETE = 4, | ||
1076 | CS_SERIALIZE = 8, | ||
1077 | CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE, | ||
1078 | }; | ||
1079 | |||
1080 | extern void drbd_init_set_defaults(struct drbd_conf *mdev); | ||
1081 | extern int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, | ||
1082 | union drbd_state mask, union drbd_state val); | ||
1083 | extern void drbd_force_state(struct drbd_conf *, union drbd_state, | ||
1084 | union drbd_state); | ||
1085 | extern int _drbd_request_state(struct drbd_conf *, union drbd_state, | ||
1086 | union drbd_state, enum chg_state_flags); | ||
1087 | extern int __drbd_set_state(struct drbd_conf *, union drbd_state, | ||
1088 | enum chg_state_flags, struct completion *done); | ||
1089 | extern void print_st_err(struct drbd_conf *, union drbd_state, | ||
1090 | union drbd_state, int); | ||
1091 | extern int drbd_thread_start(struct drbd_thread *thi); | ||
1092 | extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); | ||
1093 | #ifdef CONFIG_SMP | ||
1094 | extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev); | ||
1095 | extern void drbd_calc_cpu_mask(struct drbd_conf *mdev); | ||
1096 | #else | ||
1097 | #define drbd_thread_current_set_cpu(A) ({}) | ||
1098 | #define drbd_calc_cpu_mask(A) ({}) | ||
1099 | #endif | ||
1100 | extern void drbd_free_resources(struct drbd_conf *mdev); | ||
1101 | extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, | ||
1102 | unsigned int set_size); | ||
1103 | extern void tl_clear(struct drbd_conf *mdev); | ||
1104 | extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *); | ||
1105 | extern void drbd_free_sock(struct drbd_conf *mdev); | ||
1106 | extern int drbd_send(struct drbd_conf *mdev, struct socket *sock, | ||
1107 | void *buf, size_t size, unsigned msg_flags); | ||
1108 | extern int drbd_send_protocol(struct drbd_conf *mdev); | ||
1109 | extern int drbd_send_uuids(struct drbd_conf *mdev); | ||
1110 | extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); | ||
1111 | extern int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val); | ||
1112 | extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply); | ||
1113 | extern int _drbd_send_state(struct drbd_conf *mdev); | ||
1114 | extern int drbd_send_state(struct drbd_conf *mdev); | ||
1115 | extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, | ||
1116 | enum drbd_packets cmd, struct p_header *h, | ||
1117 | size_t size, unsigned msg_flags); | ||
1118 | #define USE_DATA_SOCKET 1 | ||
1119 | #define USE_META_SOCKET 0 | ||
1120 | extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, | ||
1121 | enum drbd_packets cmd, struct p_header *h, | ||
1122 | size_t size); | ||
1123 | extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, | ||
1124 | char *data, size_t size); | ||
1125 | extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc); | ||
1126 | extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, | ||
1127 | u32 set_size); | ||
1128 | extern int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, | ||
1129 | struct drbd_epoch_entry *e); | ||
1130 | extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd, | ||
1131 | struct p_block_req *rp); | ||
1132 | extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, | ||
1133 | struct p_data *dp); | ||
1134 | extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd, | ||
1135 | sector_t sector, int blksize, u64 block_id); | ||
1136 | extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, | ||
1137 | struct drbd_epoch_entry *e); | ||
1138 | extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req); | ||
1139 | extern int _drbd_send_barrier(struct drbd_conf *mdev, | ||
1140 | struct drbd_tl_epoch *barrier); | ||
1141 | extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd, | ||
1142 | sector_t sector, int size, u64 block_id); | ||
1143 | extern int drbd_send_drequest_csum(struct drbd_conf *mdev, | ||
1144 | sector_t sector,int size, | ||
1145 | void *digest, int digest_size, | ||
1146 | enum drbd_packets cmd); | ||
1147 | extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size); | ||
1148 | |||
1149 | extern int drbd_send_bitmap(struct drbd_conf *mdev); | ||
1150 | extern int _drbd_send_bitmap(struct drbd_conf *mdev); | ||
1151 | extern int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode); | ||
1152 | extern void drbd_free_bc(struct drbd_backing_dev *ldev); | ||
1153 | extern void drbd_mdev_cleanup(struct drbd_conf *mdev); | ||
1154 | |||
1155 | /* drbd_meta-data.c (still in drbd_main.c) */ | ||
1156 | extern void drbd_md_sync(struct drbd_conf *mdev); | ||
1157 | extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev); | ||
1158 | /* maybe define them below as inline? */ | ||
1159 | extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); | ||
1160 | extern void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); | ||
1161 | extern void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local); | ||
1162 | extern void _drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local); | ||
1163 | extern void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local); | ||
1164 | extern void drbd_md_set_flag(struct drbd_conf *mdev, int flags) __must_hold(local); | ||
1165 | extern void drbd_md_clear_flag(struct drbd_conf *mdev, int flags)__must_hold(local); | ||
1166 | extern int drbd_md_test_flag(struct drbd_backing_dev *, int); | ||
1167 | extern void drbd_md_mark_dirty(struct drbd_conf *mdev); | ||
1168 | extern void drbd_queue_bitmap_io(struct drbd_conf *mdev, | ||
1169 | int (*io_fn)(struct drbd_conf *), | ||
1170 | void (*done)(struct drbd_conf *, int), | ||
1171 | char *why); | ||
1172 | extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); | ||
1173 | extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); | ||
1174 | extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why); | ||
1175 | |||
1176 | |||
1177 | /* Meta data layout | ||
1178 | We reserve a 128MB Block (4k aligned) | ||
1179 | * either at the end of the backing device | ||
1180 | * or on a seperate meta data device. */ | ||
1181 | |||
1182 | #define MD_RESERVED_SECT (128LU << 11) /* 128 MB, unit sectors */ | ||
1183 | /* The following numbers are sectors */ | ||
1184 | #define MD_AL_OFFSET 8 /* 8 Sectors after start of meta area */ | ||
1185 | #define MD_AL_MAX_SIZE 64 /* = 32 kb LOG ~ 3776 extents ~ 14 GB Storage */ | ||
1186 | /* Allows up to about 3.8TB */ | ||
1187 | #define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_MAX_SIZE) | ||
1188 | |||
1189 | /* Since the smalles IO unit is usually 512 byte */ | ||
1190 | #define MD_SECTOR_SHIFT 9 | ||
1191 | #define MD_SECTOR_SIZE (1<<MD_SECTOR_SHIFT) | ||
1192 | |||
1193 | /* activity log */ | ||
1194 | #define AL_EXTENTS_PT ((MD_SECTOR_SIZE-12)/8-1) /* 61 ; Extents per 512B sector */ | ||
1195 | #define AL_EXTENT_SHIFT 22 /* One extent represents 4M Storage */ | ||
1196 | #define AL_EXTENT_SIZE (1<<AL_EXTENT_SHIFT) | ||
1197 | |||
1198 | #if BITS_PER_LONG == 32 | ||
1199 | #define LN2_BPL 5 | ||
1200 | #define cpu_to_lel(A) cpu_to_le32(A) | ||
1201 | #define lel_to_cpu(A) le32_to_cpu(A) | ||
1202 | #elif BITS_PER_LONG == 64 | ||
1203 | #define LN2_BPL 6 | ||
1204 | #define cpu_to_lel(A) cpu_to_le64(A) | ||
1205 | #define lel_to_cpu(A) le64_to_cpu(A) | ||
1206 | #else | ||
1207 | #error "LN2 of BITS_PER_LONG unknown!" | ||
1208 | #endif | ||
1209 | |||
1210 | /* resync bitmap */ | ||
1211 | /* 16MB sized 'bitmap extent' to track syncer usage */ | ||
1212 | struct bm_extent { | ||
1213 | int rs_left; /* number of bits set (out of sync) in this extent. */ | ||
1214 | int rs_failed; /* number of failed resync requests in this extent. */ | ||
1215 | unsigned long flags; | ||
1216 | struct lc_element lce; | ||
1217 | }; | ||
1218 | |||
1219 | #define BME_NO_WRITES 0 /* bm_extent.flags: no more requests on this one! */ | ||
1220 | #define BME_LOCKED 1 /* bm_extent.flags: syncer active on this one. */ | ||
1221 | |||
1222 | /* drbd_bitmap.c */ | ||
1223 | /* | ||
1224 | * We need to store one bit for a block. | ||
1225 | * Example: 1GB disk @ 4096 byte blocks ==> we need 32 KB bitmap. | ||
1226 | * Bit 0 ==> local node thinks this block is binary identical on both nodes | ||
1227 | * Bit 1 ==> local node thinks this block needs to be synced. | ||
1228 | */ | ||
1229 | |||
1230 | #define BM_BLOCK_SHIFT 12 /* 4k per bit */ | ||
1231 | #define BM_BLOCK_SIZE (1<<BM_BLOCK_SHIFT) | ||
1232 | /* (9+3) : 512 bytes @ 8 bits; representing 16M storage | ||
1233 | * per sector of on disk bitmap */ | ||
1234 | #define BM_EXT_SHIFT (BM_BLOCK_SHIFT + MD_SECTOR_SHIFT + 3) /* = 24 */ | ||
1235 | #define BM_EXT_SIZE (1<<BM_EXT_SHIFT) | ||
1236 | |||
1237 | #if (BM_EXT_SHIFT != 24) || (BM_BLOCK_SHIFT != 12) | ||
1238 | #error "HAVE YOU FIXED drbdmeta AS WELL??" | ||
1239 | #endif | ||
1240 | |||
1241 | /* thus many _storage_ sectors are described by one bit */ | ||
1242 | #define BM_SECT_TO_BIT(x) ((x)>>(BM_BLOCK_SHIFT-9)) | ||
1243 | #define BM_BIT_TO_SECT(x) ((sector_t)(x)<<(BM_BLOCK_SHIFT-9)) | ||
1244 | #define BM_SECT_PER_BIT BM_BIT_TO_SECT(1) | ||
1245 | |||
1246 | /* bit to represented kilo byte conversion */ | ||
1247 | #define Bit2KB(bits) ((bits)<<(BM_BLOCK_SHIFT-10)) | ||
1248 | |||
1249 | /* in which _bitmap_ extent (resp. sector) the bit for a certain | ||
1250 | * _storage_ sector is located in */ | ||
1251 | #define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SHIFT-9)) | ||
1252 | |||
1253 | /* how much _storage_ sectors we have per bitmap sector */ | ||
1254 | #define BM_EXT_TO_SECT(x) ((sector_t)(x) << (BM_EXT_SHIFT-9)) | ||
1255 | #define BM_SECT_PER_EXT BM_EXT_TO_SECT(1) | ||
1256 | |||
1257 | /* in one sector of the bitmap, we have this many activity_log extents. */ | ||
1258 | #define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT)) | ||
1259 | #define BM_WORDS_PER_AL_EXT (1 << (AL_EXTENT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL)) | ||
1260 | |||
1261 | #define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SHIFT - BM_BLOCK_SHIFT) | ||
1262 | #define BM_BLOCKS_PER_BM_EXT_MASK ((1<<BM_BLOCKS_PER_BM_EXT_B) - 1) | ||
1263 | |||
1264 | /* the extent in "PER_EXTENT" below is an activity log extent | ||
1265 | * we need that many (long words/bytes) to store the bitmap | ||
1266 | * of one AL_EXTENT_SIZE chunk of storage. | ||
1267 | * we can store the bitmap for that many AL_EXTENTS within | ||
1268 | * one sector of the _on_disk_ bitmap: | ||
1269 | * bit 0 bit 37 bit 38 bit (512*8)-1 | ||
1270 | * ...|........|........|.. // ..|........| | ||
1271 | * sect. 0 `296 `304 ^(512*8*8)-1 | ||
1272 | * | ||
1273 | #define BM_WORDS_PER_EXT ( (AL_EXT_SIZE/BM_BLOCK_SIZE) / BITS_PER_LONG ) | ||
1274 | #define BM_BYTES_PER_EXT ( (AL_EXT_SIZE/BM_BLOCK_SIZE) / 8 ) // 128 | ||
1275 | #define BM_EXT_PER_SECT ( 512 / BM_BYTES_PER_EXTENT ) // 4 | ||
1276 | */ | ||
1277 | |||
1278 | #define DRBD_MAX_SECTORS_32 (0xffffffffLU) | ||
1279 | #define DRBD_MAX_SECTORS_BM \ | ||
1280 | ((MD_RESERVED_SECT - MD_BM_OFFSET) * (1LL<<(BM_EXT_SHIFT-9))) | ||
1281 | #if DRBD_MAX_SECTORS_BM < DRBD_MAX_SECTORS_32 | ||
1282 | #define DRBD_MAX_SECTORS DRBD_MAX_SECTORS_BM | ||
1283 | #define DRBD_MAX_SECTORS_FLEX DRBD_MAX_SECTORS_BM | ||
1284 | #elif !defined(CONFIG_LBD) && BITS_PER_LONG == 32 | ||
1285 | #define DRBD_MAX_SECTORS DRBD_MAX_SECTORS_32 | ||
1286 | #define DRBD_MAX_SECTORS_FLEX DRBD_MAX_SECTORS_32 | ||
1287 | #else | ||
1288 | #define DRBD_MAX_SECTORS DRBD_MAX_SECTORS_BM | ||
1289 | /* 16 TB in units of sectors */ | ||
1290 | #if BITS_PER_LONG == 32 | ||
1291 | /* adjust by one page worth of bitmap, | ||
1292 | * so we won't wrap around in drbd_bm_find_next_bit. | ||
1293 | * you should use 64bit OS for that much storage, anyways. */ | ||
1294 | #define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0xffff7fff) | ||
1295 | #else | ||
1296 | #define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0x1LU << 32) | ||
1297 | #endif | ||
1298 | #endif | ||
1299 | |||
1300 | /* Sector shift value for the "hash" functions of tl_hash and ee_hash tables. | ||
1301 | * With a value of 6 all IO in one 32K block make it to the same slot of the | ||
1302 | * hash table. */ | ||
1303 | #define HT_SHIFT 6 | ||
1304 | #define DRBD_MAX_SEGMENT_SIZE (1U<<(9+HT_SHIFT)) | ||
1305 | |||
1306 | /* Number of elements in the app_reads_hash */ | ||
1307 | #define APP_R_HSIZE 15 | ||
1308 | |||
1309 | extern int drbd_bm_init(struct drbd_conf *mdev); | ||
1310 | extern int drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors); | ||
1311 | extern void drbd_bm_cleanup(struct drbd_conf *mdev); | ||
1312 | extern void drbd_bm_set_all(struct drbd_conf *mdev); | ||
1313 | extern void drbd_bm_clear_all(struct drbd_conf *mdev); | ||
1314 | extern int drbd_bm_set_bits( | ||
1315 | struct drbd_conf *mdev, unsigned long s, unsigned long e); | ||
1316 | extern int drbd_bm_clear_bits( | ||
1317 | struct drbd_conf *mdev, unsigned long s, unsigned long e); | ||
1318 | /* bm_set_bits variant for use while holding drbd_bm_lock */ | ||
1319 | extern void _drbd_bm_set_bits(struct drbd_conf *mdev, | ||
1320 | const unsigned long s, const unsigned long e); | ||
1321 | extern int drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr); | ||
1322 | extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr); | ||
1323 | extern int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local); | ||
1324 | extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); | ||
1325 | extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); | ||
1326 | extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, | ||
1327 | unsigned long al_enr); | ||
1328 | extern size_t drbd_bm_words(struct drbd_conf *mdev); | ||
1329 | extern unsigned long drbd_bm_bits(struct drbd_conf *mdev); | ||
1330 | extern sector_t drbd_bm_capacity(struct drbd_conf *mdev); | ||
1331 | extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); | ||
1332 | /* bm_find_next variants for use while you hold drbd_bm_lock() */ | ||
1333 | extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); | ||
1334 | extern unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo); | ||
1335 | extern unsigned long drbd_bm_total_weight(struct drbd_conf *mdev); | ||
1336 | extern int drbd_bm_rs_done(struct drbd_conf *mdev); | ||
1337 | /* for receive_bitmap */ | ||
1338 | extern void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, | ||
1339 | size_t number, unsigned long *buffer); | ||
1340 | /* for _drbd_send_bitmap and drbd_bm_write_sect */ | ||
1341 | extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, | ||
1342 | size_t number, unsigned long *buffer); | ||
1343 | |||
1344 | extern void drbd_bm_lock(struct drbd_conf *mdev, char *why); | ||
1345 | extern void drbd_bm_unlock(struct drbd_conf *mdev); | ||
1346 | |||
1347 | extern int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e); | ||
1348 | /* drbd_main.c */ | ||
1349 | |||
1350 | extern struct kmem_cache *drbd_request_cache; | ||
1351 | extern struct kmem_cache *drbd_ee_cache; /* epoch entries */ | ||
1352 | extern struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */ | ||
1353 | extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ | ||
1354 | extern mempool_t *drbd_request_mempool; | ||
1355 | extern mempool_t *drbd_ee_mempool; | ||
1356 | |||
1357 | extern struct page *drbd_pp_pool; /* drbd's page pool */ | ||
1358 | extern spinlock_t drbd_pp_lock; | ||
1359 | extern int drbd_pp_vacant; | ||
1360 | extern wait_queue_head_t drbd_pp_wait; | ||
1361 | |||
1362 | extern rwlock_t global_state_lock; | ||
1363 | |||
1364 | extern struct drbd_conf *drbd_new_device(unsigned int minor); | ||
1365 | extern void drbd_free_mdev(struct drbd_conf *mdev); | ||
1366 | |||
1367 | extern int proc_details; | ||
1368 | |||
1369 | /* drbd_req */ | ||
1370 | extern int drbd_make_request_26(struct request_queue *q, struct bio *bio); | ||
1371 | extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req); | ||
1372 | extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec); | ||
1373 | extern int is_valid_ar_handle(struct drbd_request *, sector_t); | ||
1374 | |||
1375 | |||
1376 | /* drbd_nl.c */ | ||
1377 | extern void drbd_suspend_io(struct drbd_conf *mdev); | ||
1378 | extern void drbd_resume_io(struct drbd_conf *mdev); | ||
1379 | extern char *ppsize(char *buf, unsigned long long size); | ||
1380 | extern sector_t drbd_new_dev_size(struct drbd_conf *, | ||
1381 | struct drbd_backing_dev *); | ||
1382 | enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 }; | ||
1383 | extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *) __must_hold(local); | ||
1384 | extern void resync_after_online_grow(struct drbd_conf *); | ||
1385 | extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local); | ||
1386 | extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, | ||
1387 | int force); | ||
1388 | enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev); | ||
1389 | extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); | ||
1390 | |||
1391 | /* drbd_worker.c */ | ||
1392 | extern int drbd_worker(struct drbd_thread *thi); | ||
1393 | extern int drbd_alter_sa(struct drbd_conf *mdev, int na); | ||
1394 | extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side); | ||
1395 | extern void resume_next_sg(struct drbd_conf *mdev); | ||
1396 | extern void suspend_other_sg(struct drbd_conf *mdev); | ||
1397 | extern int drbd_resync_finished(struct drbd_conf *mdev); | ||
1398 | /* maybe rather drbd_main.c ? */ | ||
1399 | extern int drbd_md_sync_page_io(struct drbd_conf *mdev, | ||
1400 | struct drbd_backing_dev *bdev, sector_t sector, int rw); | ||
1401 | extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int); | ||
1402 | |||
1403 | static inline void ov_oos_print(struct drbd_conf *mdev) | ||
1404 | { | ||
1405 | if (mdev->ov_last_oos_size) { | ||
1406 | dev_err(DEV, "Out of sync: start=%llu, size=%lu (sectors)\n", | ||
1407 | (unsigned long long)mdev->ov_last_oos_start, | ||
1408 | (unsigned long)mdev->ov_last_oos_size); | ||
1409 | } | ||
1410 | mdev->ov_last_oos_size=0; | ||
1411 | } | ||
1412 | |||
1413 | |||
1414 | extern void drbd_csum(struct drbd_conf *, struct crypto_hash *, struct bio *, void *); | ||
1415 | /* worker callbacks */ | ||
1416 | extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int); | ||
1417 | extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int); | ||
1418 | extern int w_e_end_data_req(struct drbd_conf *, struct drbd_work *, int); | ||
1419 | extern int w_e_end_rsdata_req(struct drbd_conf *, struct drbd_work *, int); | ||
1420 | extern int w_e_end_csum_rs_req(struct drbd_conf *, struct drbd_work *, int); | ||
1421 | extern int w_e_end_ov_reply(struct drbd_conf *, struct drbd_work *, int); | ||
1422 | extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int); | ||
1423 | extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); | ||
1424 | extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int); | ||
1425 | extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); | ||
1426 | extern int w_io_error(struct drbd_conf *, struct drbd_work *, int); | ||
1427 | extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); | ||
1428 | extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int); | ||
1429 | extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); | ||
1430 | extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int); | ||
1431 | extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int); | ||
1432 | extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int); | ||
1433 | extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int); | ||
1434 | |||
1435 | extern void resync_timer_fn(unsigned long data); | ||
1436 | |||
1437 | /* drbd_receiver.c */ | ||
1438 | extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); | ||
1439 | extern struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, | ||
1440 | u64 id, | ||
1441 | sector_t sector, | ||
1442 | unsigned int data_size, | ||
1443 | gfp_t gfp_mask) __must_hold(local); | ||
1444 | extern void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e); | ||
1445 | extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev, | ||
1446 | struct list_head *head); | ||
1447 | extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, | ||
1448 | struct list_head *head); | ||
1449 | extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled); | ||
1450 | extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed); | ||
1451 | extern void drbd_flush_workqueue(struct drbd_conf *mdev); | ||
1452 | |||
1453 | /* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to | ||
1454 | * mess with get_fs/set_fs, we know we are KERNEL_DS always. */ | ||
1455 | static inline int drbd_setsockopt(struct socket *sock, int level, int optname, | ||
1456 | char __user *optval, int optlen) | ||
1457 | { | ||
1458 | int err; | ||
1459 | if (level == SOL_SOCKET) | ||
1460 | err = sock_setsockopt(sock, level, optname, optval, optlen); | ||
1461 | else | ||
1462 | err = sock->ops->setsockopt(sock, level, optname, optval, | ||
1463 | optlen); | ||
1464 | return err; | ||
1465 | } | ||
1466 | |||
1467 | static inline void drbd_tcp_cork(struct socket *sock) | ||
1468 | { | ||
1469 | int __user val = 1; | ||
1470 | (void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK, | ||
1471 | (char __user *)&val, sizeof(val)); | ||
1472 | } | ||
1473 | |||
1474 | static inline void drbd_tcp_uncork(struct socket *sock) | ||
1475 | { | ||
1476 | int __user val = 0; | ||
1477 | (void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK, | ||
1478 | (char __user *)&val, sizeof(val)); | ||
1479 | } | ||
1480 | |||
1481 | static inline void drbd_tcp_nodelay(struct socket *sock) | ||
1482 | { | ||
1483 | int __user val = 1; | ||
1484 | (void) drbd_setsockopt(sock, SOL_TCP, TCP_NODELAY, | ||
1485 | (char __user *)&val, sizeof(val)); | ||
1486 | } | ||
1487 | |||
1488 | static inline void drbd_tcp_quickack(struct socket *sock) | ||
1489 | { | ||
1490 | int __user val = 1; | ||
1491 | (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK, | ||
1492 | (char __user *)&val, sizeof(val)); | ||
1493 | } | ||
1494 | |||
1495 | void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo); | ||
1496 | |||
1497 | /* drbd_proc.c */ | ||
1498 | extern struct proc_dir_entry *drbd_proc; | ||
1499 | extern struct file_operations drbd_proc_fops; | ||
1500 | extern const char *drbd_conn_str(enum drbd_conns s); | ||
1501 | extern const char *drbd_role_str(enum drbd_role s); | ||
1502 | |||
1503 | /* drbd_actlog.c */ | ||
1504 | extern void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector); | ||
1505 | extern void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector); | ||
1506 | extern void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector); | ||
1507 | extern int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector); | ||
1508 | extern int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector); | ||
1509 | extern void drbd_rs_cancel_all(struct drbd_conf *mdev); | ||
1510 | extern int drbd_rs_del_all(struct drbd_conf *mdev); | ||
1511 | extern void drbd_rs_failed_io(struct drbd_conf *mdev, | ||
1512 | sector_t sector, int size); | ||
1513 | extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *); | ||
1514 | extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, | ||
1515 | int size, const char *file, const unsigned int line); | ||
1516 | #define drbd_set_in_sync(mdev, sector, size) \ | ||
1517 | __drbd_set_in_sync(mdev, sector, size, __FILE__, __LINE__) | ||
1518 | extern void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, | ||
1519 | int size, const char *file, const unsigned int line); | ||
1520 | #define drbd_set_out_of_sync(mdev, sector, size) \ | ||
1521 | __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__) | ||
1522 | extern void drbd_al_apply_to_bm(struct drbd_conf *mdev); | ||
1523 | extern void drbd_al_to_on_disk_bm(struct drbd_conf *mdev); | ||
1524 | extern void drbd_al_shrink(struct drbd_conf *mdev); | ||
1525 | |||
1526 | |||
1527 | /* drbd_nl.c */ | ||
1528 | |||
1529 | void drbd_nl_cleanup(void); | ||
1530 | int __init drbd_nl_init(void); | ||
1531 | void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state); | ||
1532 | void drbd_bcast_sync_progress(struct drbd_conf *mdev); | ||
1533 | void drbd_bcast_ee(struct drbd_conf *mdev, | ||
1534 | const char *reason, const int dgs, | ||
1535 | const char* seen_hash, const char* calc_hash, | ||
1536 | const struct drbd_epoch_entry* e); | ||
1537 | |||
1538 | |||
1539 | /** | ||
1540 | * DOC: DRBD State macros | ||
1541 | * | ||
1542 | * These macros are used to express state changes in easily readable form. | ||
1543 | * | ||
1544 | * The NS macros expand to a mask and a value, that can be bit ored onto the | ||
1545 | * current state as soon as the spinlock (req_lock) was taken. | ||
1546 | * | ||
1547 | * The _NS macros are used for state functions that get called with the | ||
1548 | * spinlock. These macros expand directly to the new state value. | ||
1549 | * | ||
1550 | * Besides the basic forms NS() and _NS() additional _?NS[23] are defined | ||
1551 | * to express state changes that affect more than one aspect of the state. | ||
1552 | * | ||
1553 | * E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY) | ||
1554 | * Means that the network connection was established and that the peer | ||
1555 | * is in secondary role. | ||
1556 | */ | ||
1557 | #define role_MASK R_MASK | ||
1558 | #define peer_MASK R_MASK | ||
1559 | #define disk_MASK D_MASK | ||
1560 | #define pdsk_MASK D_MASK | ||
1561 | #define conn_MASK C_MASK | ||
1562 | #define susp_MASK 1 | ||
1563 | #define user_isp_MASK 1 | ||
1564 | #define aftr_isp_MASK 1 | ||
1565 | |||
1566 | #define NS(T, S) \ | ||
1567 | ({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \ | ||
1568 | ({ union drbd_state val; val.i = 0; val.T = (S); val; }) | ||
1569 | #define NS2(T1, S1, T2, S2) \ | ||
1570 | ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \ | ||
1571 | mask.T2 = T2##_MASK; mask; }), \ | ||
1572 | ({ union drbd_state val; val.i = 0; val.T1 = (S1); \ | ||
1573 | val.T2 = (S2); val; }) | ||
1574 | #define NS3(T1, S1, T2, S2, T3, S3) \ | ||
1575 | ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \ | ||
1576 | mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \ | ||
1577 | ({ union drbd_state val; val.i = 0; val.T1 = (S1); \ | ||
1578 | val.T2 = (S2); val.T3 = (S3); val; }) | ||
1579 | |||
1580 | #define _NS(D, T, S) \ | ||
1581 | D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T = (S); __ns; }) | ||
1582 | #define _NS2(D, T1, S1, T2, S2) \ | ||
1583 | D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ | ||
1584 | __ns.T2 = (S2); __ns; }) | ||
1585 | #define _NS3(D, T1, S1, T2, S2, T3, S3) \ | ||
1586 | D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ | ||
1587 | __ns.T2 = (S2); __ns.T3 = (S3); __ns; }) | ||
1588 | |||
1589 | /* | ||
1590 | * inline helper functions | ||
1591 | *************************/ | ||
1592 | |||
1593 | static inline void drbd_state_lock(struct drbd_conf *mdev) | ||
1594 | { | ||
1595 | wait_event(mdev->misc_wait, | ||
1596 | !test_and_set_bit(CLUSTER_ST_CHANGE, &mdev->flags)); | ||
1597 | } | ||
1598 | |||
1599 | static inline void drbd_state_unlock(struct drbd_conf *mdev) | ||
1600 | { | ||
1601 | clear_bit(CLUSTER_ST_CHANGE, &mdev->flags); | ||
1602 | wake_up(&mdev->misc_wait); | ||
1603 | } | ||
1604 | |||
1605 | static inline int _drbd_set_state(struct drbd_conf *mdev, | ||
1606 | union drbd_state ns, enum chg_state_flags flags, | ||
1607 | struct completion *done) | ||
1608 | { | ||
1609 | int rv; | ||
1610 | |||
1611 | read_lock(&global_state_lock); | ||
1612 | rv = __drbd_set_state(mdev, ns, flags, done); | ||
1613 | read_unlock(&global_state_lock); | ||
1614 | |||
1615 | return rv; | ||
1616 | } | ||
1617 | |||
1618 | /** | ||
1619 | * drbd_request_state() - Reqest a state change | ||
1620 | * @mdev: DRBD device. | ||
1621 | * @mask: mask of state bits to change. | ||
1622 | * @val: value of new state bits. | ||
1623 | * | ||
1624 | * This is the most graceful way of requesting a state change. It is verbose | ||
1625 | * quite verbose in case the state change is not possible, and all those | ||
1626 | * state changes are globally serialized. | ||
1627 | */ | ||
1628 | static inline int drbd_request_state(struct drbd_conf *mdev, | ||
1629 | union drbd_state mask, | ||
1630 | union drbd_state val) | ||
1631 | { | ||
1632 | return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED); | ||
1633 | } | ||
1634 | |||
1635 | #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) | ||
1636 | static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where) | ||
1637 | { | ||
1638 | switch (mdev->ldev->dc.on_io_error) { | ||
1639 | case EP_PASS_ON: | ||
1640 | if (!forcedetach) { | ||
1641 | if (printk_ratelimit()) | ||
1642 | dev_err(DEV, "Local IO failed in %s." | ||
1643 | "Passing error on...\n", where); | ||
1644 | break; | ||
1645 | } | ||
1646 | /* NOTE fall through to detach case if forcedetach set */ | ||
1647 | case EP_DETACH: | ||
1648 | case EP_CALL_HELPER: | ||
1649 | if (mdev->state.disk > D_FAILED) { | ||
1650 | _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL); | ||
1651 | dev_err(DEV, "Local IO failed in %s." | ||
1652 | "Detaching...\n", where); | ||
1653 | } | ||
1654 | break; | ||
1655 | } | ||
1656 | } | ||
1657 | |||
1658 | /** | ||
1659 | * drbd_chk_io_error: Handle the on_io_error setting, should be called from all io completion handlers | ||
1660 | * @mdev: DRBD device. | ||
1661 | * @error: Error code passed to the IO completion callback | ||
1662 | * @forcedetach: Force detach. I.e. the error happened while accessing the meta data | ||
1663 | * | ||
1664 | * See also drbd_main.c:after_state_ch() if (os.disk > D_FAILED && ns.disk == D_FAILED) | ||
1665 | */ | ||
1666 | #define drbd_chk_io_error(m,e,f) drbd_chk_io_error_(m,e,f, __func__) | ||
1667 | static inline void drbd_chk_io_error_(struct drbd_conf *mdev, | ||
1668 | int error, int forcedetach, const char *where) | ||
1669 | { | ||
1670 | if (error) { | ||
1671 | unsigned long flags; | ||
1672 | spin_lock_irqsave(&mdev->req_lock, flags); | ||
1673 | __drbd_chk_io_error_(mdev, forcedetach, where); | ||
1674 | spin_unlock_irqrestore(&mdev->req_lock, flags); | ||
1675 | } | ||
1676 | } | ||
1677 | |||
1678 | |||
1679 | /** | ||
1680 | * drbd_md_first_sector() - Returns the first sector number of the meta data area | ||
1681 | * @bdev: Meta data block device. | ||
1682 | * | ||
1683 | * BTW, for internal meta data, this happens to be the maximum capacity | ||
1684 | * we could agree upon with our peer node. | ||
1685 | */ | ||
1686 | static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev) | ||
1687 | { | ||
1688 | switch (bdev->dc.meta_dev_idx) { | ||
1689 | case DRBD_MD_INDEX_INTERNAL: | ||
1690 | case DRBD_MD_INDEX_FLEX_INT: | ||
1691 | return bdev->md.md_offset + bdev->md.bm_offset; | ||
1692 | case DRBD_MD_INDEX_FLEX_EXT: | ||
1693 | default: | ||
1694 | return bdev->md.md_offset; | ||
1695 | } | ||
1696 | } | ||
1697 | |||
1698 | /** | ||
1699 | * drbd_md_last_sector() - Return the last sector number of the meta data area | ||
1700 | * @bdev: Meta data block device. | ||
1701 | */ | ||
1702 | static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev) | ||
1703 | { | ||
1704 | switch (bdev->dc.meta_dev_idx) { | ||
1705 | case DRBD_MD_INDEX_INTERNAL: | ||
1706 | case DRBD_MD_INDEX_FLEX_INT: | ||
1707 | return bdev->md.md_offset + MD_AL_OFFSET - 1; | ||
1708 | case DRBD_MD_INDEX_FLEX_EXT: | ||
1709 | default: | ||
1710 | return bdev->md.md_offset + bdev->md.md_size_sect; | ||
1711 | } | ||
1712 | } | ||
1713 | |||
1714 | /* Returns the number of 512 byte sectors of the device */ | ||
1715 | static inline sector_t drbd_get_capacity(struct block_device *bdev) | ||
1716 | { | ||
1717 | /* return bdev ? get_capacity(bdev->bd_disk) : 0; */ | ||
1718 | return bdev ? bdev->bd_inode->i_size >> 9 : 0; | ||
1719 | } | ||
1720 | |||
1721 | /** | ||
1722 | * drbd_get_max_capacity() - Returns the capacity we announce to out peer | ||
1723 | * @bdev: Meta data block device. | ||
1724 | * | ||
1725 | * returns the capacity we announce to out peer. we clip ourselves at the | ||
1726 | * various MAX_SECTORS, because if we don't, current implementation will | ||
1727 | * oops sooner or later | ||
1728 | */ | ||
1729 | static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev) | ||
1730 | { | ||
1731 | sector_t s; | ||
1732 | switch (bdev->dc.meta_dev_idx) { | ||
1733 | case DRBD_MD_INDEX_INTERNAL: | ||
1734 | case DRBD_MD_INDEX_FLEX_INT: | ||
1735 | s = drbd_get_capacity(bdev->backing_bdev) | ||
1736 | ? min_t(sector_t, DRBD_MAX_SECTORS_FLEX, | ||
1737 | drbd_md_first_sector(bdev)) | ||
1738 | : 0; | ||
1739 | break; | ||
1740 | case DRBD_MD_INDEX_FLEX_EXT: | ||
1741 | s = min_t(sector_t, DRBD_MAX_SECTORS_FLEX, | ||
1742 | drbd_get_capacity(bdev->backing_bdev)); | ||
1743 | /* clip at maximum size the meta device can support */ | ||
1744 | s = min_t(sector_t, s, | ||
1745 | BM_EXT_TO_SECT(bdev->md.md_size_sect | ||
1746 | - bdev->md.bm_offset)); | ||
1747 | break; | ||
1748 | default: | ||
1749 | s = min_t(sector_t, DRBD_MAX_SECTORS, | ||
1750 | drbd_get_capacity(bdev->backing_bdev)); | ||
1751 | } | ||
1752 | return s; | ||
1753 | } | ||
1754 | |||
1755 | /** | ||
1756 | * drbd_md_ss__() - Return the sector number of our meta data super block | ||
1757 | * @mdev: DRBD device. | ||
1758 | * @bdev: Meta data block device. | ||
1759 | */ | ||
1760 | static inline sector_t drbd_md_ss__(struct drbd_conf *mdev, | ||
1761 | struct drbd_backing_dev *bdev) | ||
1762 | { | ||
1763 | switch (bdev->dc.meta_dev_idx) { | ||
1764 | default: /* external, some index */ | ||
1765 | return MD_RESERVED_SECT * bdev->dc.meta_dev_idx; | ||
1766 | case DRBD_MD_INDEX_INTERNAL: | ||
1767 | /* with drbd08, internal meta data is always "flexible" */ | ||
1768 | case DRBD_MD_INDEX_FLEX_INT: | ||
1769 | /* sizeof(struct md_on_disk_07) == 4k | ||
1770 | * position: last 4k aligned block of 4k size */ | ||
1771 | if (!bdev->backing_bdev) { | ||
1772 | if (__ratelimit(&drbd_ratelimit_state)) { | ||
1773 | dev_err(DEV, "bdev->backing_bdev==NULL\n"); | ||
1774 | dump_stack(); | ||
1775 | } | ||
1776 | return 0; | ||
1777 | } | ||
1778 | return (drbd_get_capacity(bdev->backing_bdev) & ~7ULL) | ||
1779 | - MD_AL_OFFSET; | ||
1780 | case DRBD_MD_INDEX_FLEX_EXT: | ||
1781 | return 0; | ||
1782 | } | ||
1783 | } | ||
1784 | |||
1785 | static inline void | ||
1786 | _drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) | ||
1787 | { | ||
1788 | list_add_tail(&w->list, &q->q); | ||
1789 | up(&q->s); | ||
1790 | } | ||
1791 | |||
1792 | static inline void | ||
1793 | drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w) | ||
1794 | { | ||
1795 | unsigned long flags; | ||
1796 | spin_lock_irqsave(&q->q_lock, flags); | ||
1797 | list_add(&w->list, &q->q); | ||
1798 | up(&q->s); /* within the spinlock, | ||
1799 | see comment near end of drbd_worker() */ | ||
1800 | spin_unlock_irqrestore(&q->q_lock, flags); | ||
1801 | } | ||
1802 | |||
1803 | static inline void | ||
1804 | drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) | ||
1805 | { | ||
1806 | unsigned long flags; | ||
1807 | spin_lock_irqsave(&q->q_lock, flags); | ||
1808 | list_add_tail(&w->list, &q->q); | ||
1809 | up(&q->s); /* within the spinlock, | ||
1810 | see comment near end of drbd_worker() */ | ||
1811 | spin_unlock_irqrestore(&q->q_lock, flags); | ||
1812 | } | ||
1813 | |||
1814 | static inline void wake_asender(struct drbd_conf *mdev) | ||
1815 | { | ||
1816 | if (test_bit(SIGNAL_ASENDER, &mdev->flags)) | ||
1817 | force_sig(DRBD_SIG, mdev->asender.task); | ||
1818 | } | ||
1819 | |||
1820 | static inline void request_ping(struct drbd_conf *mdev) | ||
1821 | { | ||
1822 | set_bit(SEND_PING, &mdev->flags); | ||
1823 | wake_asender(mdev); | ||
1824 | } | ||
1825 | |||
1826 | static inline int drbd_send_short_cmd(struct drbd_conf *mdev, | ||
1827 | enum drbd_packets cmd) | ||
1828 | { | ||
1829 | struct p_header h; | ||
1830 | return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h)); | ||
1831 | } | ||
1832 | |||
1833 | static inline int drbd_send_ping(struct drbd_conf *mdev) | ||
1834 | { | ||
1835 | struct p_header h; | ||
1836 | return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h)); | ||
1837 | } | ||
1838 | |||
1839 | static inline int drbd_send_ping_ack(struct drbd_conf *mdev) | ||
1840 | { | ||
1841 | struct p_header h; | ||
1842 | return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h)); | ||
1843 | } | ||
1844 | |||
1845 | static inline void drbd_thread_stop(struct drbd_thread *thi) | ||
1846 | { | ||
1847 | _drbd_thread_stop(thi, FALSE, TRUE); | ||
1848 | } | ||
1849 | |||
1850 | static inline void drbd_thread_stop_nowait(struct drbd_thread *thi) | ||
1851 | { | ||
1852 | _drbd_thread_stop(thi, FALSE, FALSE); | ||
1853 | } | ||
1854 | |||
1855 | static inline void drbd_thread_restart_nowait(struct drbd_thread *thi) | ||
1856 | { | ||
1857 | _drbd_thread_stop(thi, TRUE, FALSE); | ||
1858 | } | ||
1859 | |||
1860 | /* counts how many answer packets packets we expect from our peer, | ||
1861 | * for either explicit application requests, | ||
1862 | * or implicit barrier packets as necessary. | ||
1863 | * increased: | ||
1864 | * w_send_barrier | ||
1865 | * _req_mod(req, queue_for_net_write or queue_for_net_read); | ||
1866 | * it is much easier and equally valid to count what we queue for the | ||
1867 | * worker, even before it actually was queued or send. | ||
1868 | * (drbd_make_request_common; recovery path on read io-error) | ||
1869 | * decreased: | ||
1870 | * got_BarrierAck (respective tl_clear, tl_clear_barrier) | ||
1871 | * _req_mod(req, data_received) | ||
1872 | * [from receive_DataReply] | ||
1873 | * _req_mod(req, write_acked_by_peer or recv_acked_by_peer or neg_acked) | ||
1874 | * [from got_BlockAck (P_WRITE_ACK, P_RECV_ACK)] | ||
1875 | * for some reason it is NOT decreased in got_NegAck, | ||
1876 | * but in the resulting cleanup code from report_params. | ||
1877 | * we should try to remember the reason for that... | ||
1878 | * _req_mod(req, send_failed or send_canceled) | ||
1879 | * _req_mod(req, connection_lost_while_pending) | ||
1880 | * [from tl_clear_barrier] | ||
1881 | */ | ||
1882 | static inline void inc_ap_pending(struct drbd_conf *mdev) | ||
1883 | { | ||
1884 | atomic_inc(&mdev->ap_pending_cnt); | ||
1885 | } | ||
1886 | |||
1887 | #define ERR_IF_CNT_IS_NEGATIVE(which) \ | ||
1888 | if (atomic_read(&mdev->which) < 0) \ | ||
1889 | dev_err(DEV, "in %s:%d: " #which " = %d < 0 !\n", \ | ||
1890 | __func__ , __LINE__ , \ | ||
1891 | atomic_read(&mdev->which)) | ||
1892 | |||
1893 | #define dec_ap_pending(mdev) do { \ | ||
1894 | typecheck(struct drbd_conf *, mdev); \ | ||
1895 | if (atomic_dec_and_test(&mdev->ap_pending_cnt)) \ | ||
1896 | wake_up(&mdev->misc_wait); \ | ||
1897 | ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt); } while (0) | ||
1898 | |||
1899 | /* counts how many resync-related answers we still expect from the peer | ||
1900 | * increase decrease | ||
1901 | * C_SYNC_TARGET sends P_RS_DATA_REQUEST (and expects P_RS_DATA_REPLY) | ||
1902 | * C_SYNC_SOURCE sends P_RS_DATA_REPLY (and expects P_WRITE_ACK whith ID_SYNCER) | ||
1903 | * (or P_NEG_ACK with ID_SYNCER) | ||
1904 | */ | ||
1905 | static inline void inc_rs_pending(struct drbd_conf *mdev) | ||
1906 | { | ||
1907 | atomic_inc(&mdev->rs_pending_cnt); | ||
1908 | } | ||
1909 | |||
1910 | #define dec_rs_pending(mdev) do { \ | ||
1911 | typecheck(struct drbd_conf *, mdev); \ | ||
1912 | atomic_dec(&mdev->rs_pending_cnt); \ | ||
1913 | ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt); } while (0) | ||
1914 | |||
1915 | /* counts how many answers we still need to send to the peer. | ||
1916 | * increased on | ||
1917 | * receive_Data unless protocol A; | ||
1918 | * we need to send a P_RECV_ACK (proto B) | ||
1919 | * or P_WRITE_ACK (proto C) | ||
1920 | * receive_RSDataReply (recv_resync_read) we need to send a P_WRITE_ACK | ||
1921 | * receive_DataRequest (receive_RSDataRequest) we need to send back P_DATA | ||
1922 | * receive_Barrier_* we need to send a P_BARRIER_ACK | ||
1923 | */ | ||
1924 | static inline void inc_unacked(struct drbd_conf *mdev) | ||
1925 | { | ||
1926 | atomic_inc(&mdev->unacked_cnt); | ||
1927 | } | ||
1928 | |||
1929 | #define dec_unacked(mdev) do { \ | ||
1930 | typecheck(struct drbd_conf *, mdev); \ | ||
1931 | atomic_dec(&mdev->unacked_cnt); \ | ||
1932 | ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0) | ||
1933 | |||
1934 | #define sub_unacked(mdev, n) do { \ | ||
1935 | typecheck(struct drbd_conf *, mdev); \ | ||
1936 | atomic_sub(n, &mdev->unacked_cnt); \ | ||
1937 | ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0) | ||
1938 | |||
1939 | |||
1940 | static inline void put_net_conf(struct drbd_conf *mdev) | ||
1941 | { | ||
1942 | if (atomic_dec_and_test(&mdev->net_cnt)) | ||
1943 | wake_up(&mdev->misc_wait); | ||
1944 | } | ||
1945 | |||
1946 | /** | ||
1947 | * get_net_conf() - Increase ref count on mdev->net_conf; Returns 0 if nothing there | ||
1948 | * @mdev: DRBD device. | ||
1949 | * | ||
1950 | * You have to call put_net_conf() when finished working with mdev->net_conf. | ||
1951 | */ | ||
1952 | static inline int get_net_conf(struct drbd_conf *mdev) | ||
1953 | { | ||
1954 | int have_net_conf; | ||
1955 | |||
1956 | atomic_inc(&mdev->net_cnt); | ||
1957 | have_net_conf = mdev->state.conn >= C_UNCONNECTED; | ||
1958 | if (!have_net_conf) | ||
1959 | put_net_conf(mdev); | ||
1960 | return have_net_conf; | ||
1961 | } | ||
1962 | |||
1963 | /** | ||
1964 | * get_ldev() - Increase the ref count on mdev->ldev. Returns 0 if there is no ldev | ||
1965 | * @M: DRBD device. | ||
1966 | * | ||
1967 | * You have to call put_ldev() when finished working with mdev->ldev. | ||
1968 | */ | ||
1969 | #define get_ldev(M) __cond_lock(local, _get_ldev_if_state(M,D_INCONSISTENT)) | ||
1970 | #define get_ldev_if_state(M,MINS) __cond_lock(local, _get_ldev_if_state(M,MINS)) | ||
1971 | |||
1972 | static inline void put_ldev(struct drbd_conf *mdev) | ||
1973 | { | ||
1974 | __release(local); | ||
1975 | if (atomic_dec_and_test(&mdev->local_cnt)) | ||
1976 | wake_up(&mdev->misc_wait); | ||
1977 | D_ASSERT(atomic_read(&mdev->local_cnt) >= 0); | ||
1978 | } | ||
1979 | |||
1980 | #ifndef __CHECKER__ | ||
1981 | static inline int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) | ||
1982 | { | ||
1983 | int io_allowed; | ||
1984 | |||
1985 | atomic_inc(&mdev->local_cnt); | ||
1986 | io_allowed = (mdev->state.disk >= mins); | ||
1987 | if (!io_allowed) | ||
1988 | put_ldev(mdev); | ||
1989 | return io_allowed; | ||
1990 | } | ||
1991 | #else | ||
1992 | extern int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins); | ||
1993 | #endif | ||
1994 | |||
1995 | /* you must have an "get_ldev" reference */ | ||
1996 | static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, | ||
1997 | unsigned long *bits_left, unsigned int *per_mil_done) | ||
1998 | { | ||
1999 | /* | ||
2000 | * this is to break it at compile time when we change that | ||
2001 | * (we may feel 4TB maximum storage per drbd is not enough) | ||
2002 | */ | ||
2003 | typecheck(unsigned long, mdev->rs_total); | ||
2004 | |||
2005 | /* note: both rs_total and rs_left are in bits, i.e. in | ||
2006 | * units of BM_BLOCK_SIZE. | ||
2007 | * for the percentage, we don't care. */ | ||
2008 | |||
2009 | *bits_left = drbd_bm_total_weight(mdev) - mdev->rs_failed; | ||
2010 | /* >> 10 to prevent overflow, | ||
2011 | * +1 to prevent division by zero */ | ||
2012 | if (*bits_left > mdev->rs_total) { | ||
2013 | /* doh. maybe a logic bug somewhere. | ||
2014 | * may also be just a race condition | ||
2015 | * between this and a disconnect during sync. | ||
2016 | * for now, just prevent in-kernel buffer overflow. | ||
2017 | */ | ||
2018 | smp_rmb(); | ||
2019 | dev_warn(DEV, "cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu)\n", | ||
2020 | drbd_conn_str(mdev->state.conn), | ||
2021 | *bits_left, mdev->rs_total, mdev->rs_failed); | ||
2022 | *per_mil_done = 0; | ||
2023 | } else { | ||
2024 | /* make sure the calculation happens in long context */ | ||
2025 | unsigned long tmp = 1000UL - | ||
2026 | (*bits_left >> 10)*1000UL | ||
2027 | / ((mdev->rs_total >> 10) + 1UL); | ||
2028 | *per_mil_done = tmp; | ||
2029 | } | ||
2030 | } | ||
2031 | |||
2032 | |||
2033 | /* this throttles on-the-fly application requests | ||
2034 | * according to max_buffers settings; | ||
2035 | * maybe re-implement using semaphores? */ | ||
2036 | static inline int drbd_get_max_buffers(struct drbd_conf *mdev) | ||
2037 | { | ||
2038 | int mxb = 1000000; /* arbitrary limit on open requests */ | ||
2039 | if (get_net_conf(mdev)) { | ||
2040 | mxb = mdev->net_conf->max_buffers; | ||
2041 | put_net_conf(mdev); | ||
2042 | } | ||
2043 | return mxb; | ||
2044 | } | ||
2045 | |||
2046 | static inline int drbd_state_is_stable(union drbd_state s) | ||
2047 | { | ||
2048 | |||
2049 | /* DO NOT add a default clause, we want the compiler to warn us | ||
2050 | * for any newly introduced state we may have forgotten to add here */ | ||
2051 | |||
2052 | switch ((enum drbd_conns)s.conn) { | ||
2053 | /* new io only accepted when there is no connection, ... */ | ||
2054 | case C_STANDALONE: | ||
2055 | case C_WF_CONNECTION: | ||
2056 | /* ... or there is a well established connection. */ | ||
2057 | case C_CONNECTED: | ||
2058 | case C_SYNC_SOURCE: | ||
2059 | case C_SYNC_TARGET: | ||
2060 | case C_VERIFY_S: | ||
2061 | case C_VERIFY_T: | ||
2062 | case C_PAUSED_SYNC_S: | ||
2063 | case C_PAUSED_SYNC_T: | ||
2064 | /* maybe stable, look at the disk state */ | ||
2065 | break; | ||
2066 | |||
2067 | /* no new io accepted during tansitional states | ||
2068 | * like handshake or teardown */ | ||
2069 | case C_DISCONNECTING: | ||
2070 | case C_UNCONNECTED: | ||
2071 | case C_TIMEOUT: | ||
2072 | case C_BROKEN_PIPE: | ||
2073 | case C_NETWORK_FAILURE: | ||
2074 | case C_PROTOCOL_ERROR: | ||
2075 | case C_TEAR_DOWN: | ||
2076 | case C_WF_REPORT_PARAMS: | ||
2077 | case C_STARTING_SYNC_S: | ||
2078 | case C_STARTING_SYNC_T: | ||
2079 | case C_WF_BITMAP_S: | ||
2080 | case C_WF_BITMAP_T: | ||
2081 | case C_WF_SYNC_UUID: | ||
2082 | case C_MASK: | ||
2083 | /* not "stable" */ | ||
2084 | return 0; | ||
2085 | } | ||
2086 | |||
2087 | switch ((enum drbd_disk_state)s.disk) { | ||
2088 | case D_DISKLESS: | ||
2089 | case D_INCONSISTENT: | ||
2090 | case D_OUTDATED: | ||
2091 | case D_CONSISTENT: | ||
2092 | case D_UP_TO_DATE: | ||
2093 | /* disk state is stable as well. */ | ||
2094 | break; | ||
2095 | |||
2096 | /* no new io accepted during tansitional states */ | ||
2097 | case D_ATTACHING: | ||
2098 | case D_FAILED: | ||
2099 | case D_NEGOTIATING: | ||
2100 | case D_UNKNOWN: | ||
2101 | case D_MASK: | ||
2102 | /* not "stable" */ | ||
2103 | return 0; | ||
2104 | } | ||
2105 | |||
2106 | return 1; | ||
2107 | } | ||
2108 | |||
2109 | static inline int __inc_ap_bio_cond(struct drbd_conf *mdev) | ||
2110 | { | ||
2111 | int mxb = drbd_get_max_buffers(mdev); | ||
2112 | |||
2113 | if (mdev->state.susp) | ||
2114 | return 0; | ||
2115 | if (test_bit(SUSPEND_IO, &mdev->flags)) | ||
2116 | return 0; | ||
2117 | |||
2118 | /* to avoid potential deadlock or bitmap corruption, | ||
2119 | * in various places, we only allow new application io | ||
2120 | * to start during "stable" states. */ | ||
2121 | |||
2122 | /* no new io accepted when attaching or detaching the disk */ | ||
2123 | if (!drbd_state_is_stable(mdev->state)) | ||
2124 | return 0; | ||
2125 | |||
2126 | /* since some older kernels don't have atomic_add_unless, | ||
2127 | * and we are within the spinlock anyways, we have this workaround. */ | ||
2128 | if (atomic_read(&mdev->ap_bio_cnt) > mxb) | ||
2129 | return 0; | ||
2130 | if (test_bit(BITMAP_IO, &mdev->flags)) | ||
2131 | return 0; | ||
2132 | return 1; | ||
2133 | } | ||
2134 | |||
2135 | /* I'd like to use wait_event_lock_irq, | ||
2136 | * but I'm not sure when it got introduced, | ||
2137 | * and not sure when it has 3 or 4 arguments */ | ||
2138 | static inline void inc_ap_bio(struct drbd_conf *mdev, int one_or_two) | ||
2139 | { | ||
2140 | /* compare with after_state_ch, | ||
2141 | * os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S */ | ||
2142 | DEFINE_WAIT(wait); | ||
2143 | |||
2144 | /* we wait here | ||
2145 | * as long as the device is suspended | ||
2146 | * until the bitmap is no longer on the fly during connection | ||
2147 | * handshake as long as we would exeed the max_buffer limit. | ||
2148 | * | ||
2149 | * to avoid races with the reconnect code, | ||
2150 | * we need to atomic_inc within the spinlock. */ | ||
2151 | |||
2152 | spin_lock_irq(&mdev->req_lock); | ||
2153 | while (!__inc_ap_bio_cond(mdev)) { | ||
2154 | prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
2155 | spin_unlock_irq(&mdev->req_lock); | ||
2156 | schedule(); | ||
2157 | finish_wait(&mdev->misc_wait, &wait); | ||
2158 | spin_lock_irq(&mdev->req_lock); | ||
2159 | } | ||
2160 | atomic_add(one_or_two, &mdev->ap_bio_cnt); | ||
2161 | spin_unlock_irq(&mdev->req_lock); | ||
2162 | } | ||
2163 | |||
2164 | static inline void dec_ap_bio(struct drbd_conf *mdev) | ||
2165 | { | ||
2166 | int mxb = drbd_get_max_buffers(mdev); | ||
2167 | int ap_bio = atomic_dec_return(&mdev->ap_bio_cnt); | ||
2168 | |||
2169 | D_ASSERT(ap_bio >= 0); | ||
2170 | /* this currently does wake_up for every dec_ap_bio! | ||
2171 | * maybe rather introduce some type of hysteresis? | ||
2172 | * e.g. (ap_bio == mxb/2 || ap_bio == 0) ? */ | ||
2173 | if (ap_bio < mxb) | ||
2174 | wake_up(&mdev->misc_wait); | ||
2175 | if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) { | ||
2176 | if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) | ||
2177 | drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w); | ||
2178 | } | ||
2179 | } | ||
2180 | |||
2181 | static inline void drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) | ||
2182 | { | ||
2183 | mdev->ed_uuid = val; | ||
2184 | } | ||
2185 | |||
2186 | static inline int seq_cmp(u32 a, u32 b) | ||
2187 | { | ||
2188 | /* we assume wrap around at 32bit. | ||
2189 | * for wrap around at 24bit (old atomic_t), | ||
2190 | * we'd have to | ||
2191 | * a <<= 8; b <<= 8; | ||
2192 | */ | ||
2193 | return (s32)(a) - (s32)(b); | ||
2194 | } | ||
2195 | #define seq_lt(a, b) (seq_cmp((a), (b)) < 0) | ||
2196 | #define seq_gt(a, b) (seq_cmp((a), (b)) > 0) | ||
2197 | #define seq_ge(a, b) (seq_cmp((a), (b)) >= 0) | ||
2198 | #define seq_le(a, b) (seq_cmp((a), (b)) <= 0) | ||
2199 | /* CAUTION: please no side effects in arguments! */ | ||
2200 | #define seq_max(a, b) ((u32)(seq_gt((a), (b)) ? (a) : (b))) | ||
2201 | |||
2202 | static inline void update_peer_seq(struct drbd_conf *mdev, unsigned int new_seq) | ||
2203 | { | ||
2204 | unsigned int m; | ||
2205 | spin_lock(&mdev->peer_seq_lock); | ||
2206 | m = seq_max(mdev->peer_seq, new_seq); | ||
2207 | mdev->peer_seq = m; | ||
2208 | spin_unlock(&mdev->peer_seq_lock); | ||
2209 | if (m == new_seq) | ||
2210 | wake_up(&mdev->seq_wait); | ||
2211 | } | ||
2212 | |||
2213 | static inline void drbd_update_congested(struct drbd_conf *mdev) | ||
2214 | { | ||
2215 | struct sock *sk = mdev->data.socket->sk; | ||
2216 | if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5) | ||
2217 | set_bit(NET_CONGESTED, &mdev->flags); | ||
2218 | } | ||
2219 | |||
2220 | static inline int drbd_queue_order_type(struct drbd_conf *mdev) | ||
2221 | { | ||
2222 | /* sorry, we currently have no working implementation | ||
2223 | * of distributed TCQ stuff */ | ||
2224 | #ifndef QUEUE_ORDERED_NONE | ||
2225 | #define QUEUE_ORDERED_NONE 0 | ||
2226 | #endif | ||
2227 | return QUEUE_ORDERED_NONE; | ||
2228 | } | ||
2229 | |||
2230 | static inline void drbd_blk_run_queue(struct request_queue *q) | ||
2231 | { | ||
2232 | if (q && q->unplug_fn) | ||
2233 | q->unplug_fn(q); | ||
2234 | } | ||
2235 | |||
2236 | static inline void drbd_kick_lo(struct drbd_conf *mdev) | ||
2237 | { | ||
2238 | if (get_ldev(mdev)) { | ||
2239 | drbd_blk_run_queue(bdev_get_queue(mdev->ldev->backing_bdev)); | ||
2240 | put_ldev(mdev); | ||
2241 | } | ||
2242 | } | ||
2243 | |||
2244 | static inline void drbd_md_flush(struct drbd_conf *mdev) | ||
2245 | { | ||
2246 | int r; | ||
2247 | |||
2248 | if (test_bit(MD_NO_BARRIER, &mdev->flags)) | ||
2249 | return; | ||
2250 | |||
2251 | r = blkdev_issue_flush(mdev->ldev->md_bdev, NULL); | ||
2252 | if (r) { | ||
2253 | set_bit(MD_NO_BARRIER, &mdev->flags); | ||
2254 | dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r); | ||
2255 | } | ||
2256 | } | ||
2257 | |||
2258 | #endif | ||