diff options
Diffstat (limited to 'drivers/block/drbd/drbd_int.h')
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 2252 |
1 files changed, 2252 insertions, 0 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h new file mode 100644 index 000000000000..2312d782fe99 --- /dev/null +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -0,0 +1,2252 @@ | |||
1 | /* | ||
2 | drbd_int.h | ||
3 | |||
4 | This file is part of DRBD by Philipp Reisner and Lars Ellenberg. | ||
5 | |||
6 | Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. | ||
7 | Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. | ||
8 | Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. | ||
9 | |||
10 | drbd is free software; you can redistribute it and/or modify | ||
11 | it under the terms of the GNU General Public License as published by | ||
12 | the Free Software Foundation; either version 2, or (at your option) | ||
13 | any later version. | ||
14 | |||
15 | drbd is distributed in the hope that it will be useful, | ||
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | GNU General Public License for more details. | ||
19 | |||
20 | You should have received a copy of the GNU General Public License | ||
21 | along with drbd; see the file COPYING. If not, write to | ||
22 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
23 | |||
24 | */ | ||
25 | |||
26 | #ifndef _DRBD_INT_H | ||
27 | #define _DRBD_INT_H | ||
28 | |||
29 | #include <linux/compiler.h> | ||
30 | #include <linux/types.h> | ||
31 | #include <linux/version.h> | ||
32 | #include <linux/list.h> | ||
33 | #include <linux/sched.h> | ||
34 | #include <linux/bitops.h> | ||
35 | #include <linux/slab.h> | ||
36 | #include <linux/crypto.h> | ||
37 | #include <linux/ratelimit.h> | ||
38 | #include <linux/tcp.h> | ||
39 | #include <linux/mutex.h> | ||
40 | #include <linux/major.h> | ||
41 | #include <linux/blkdev.h> | ||
42 | #include <linux/genhd.h> | ||
43 | #include <net/tcp.h> | ||
44 | #include <linux/lru_cache.h> | ||
45 | |||
46 | #ifdef __CHECKER__ | ||
47 | # define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr"))) | ||
48 | # define __protected_read_by(x) __attribute__((require_context(x,1,999,"read"))) | ||
49 | # define __protected_write_by(x) __attribute__((require_context(x,1,999,"write"))) | ||
50 | # define __must_hold(x) __attribute__((context(x,1,1), require_context(x,1,999,"call"))) | ||
51 | #else | ||
52 | # define __protected_by(x) | ||
53 | # define __protected_read_by(x) | ||
54 | # define __protected_write_by(x) | ||
55 | # define __must_hold(x) | ||
56 | #endif | ||
57 | |||
58 | #define __no_warn(lock, stmt) do { __acquire(lock); stmt; __release(lock); } while (0) | ||
59 | |||
60 | /* module parameter, defined in drbd_main.c */ | ||
61 | extern unsigned int minor_count; | ||
62 | extern int disable_sendpage; | ||
63 | extern int allow_oos; | ||
64 | extern unsigned int cn_idx; | ||
65 | |||
66 | #ifdef CONFIG_DRBD_FAULT_INJECTION | ||
67 | extern int enable_faults; | ||
68 | extern int fault_rate; | ||
69 | extern int fault_devs; | ||
70 | #endif | ||
71 | |||
72 | extern char usermode_helper[]; | ||
73 | |||
74 | |||
75 | #ifndef TRUE | ||
76 | #define TRUE 1 | ||
77 | #endif | ||
78 | #ifndef FALSE | ||
79 | #define FALSE 0 | ||
80 | #endif | ||
81 | |||
82 | /* I don't remember why XCPU ... | ||
83 | * This is used to wake the asender, | ||
84 | * and to interrupt sending the sending task | ||
85 | * on disconnect. | ||
86 | */ | ||
87 | #define DRBD_SIG SIGXCPU | ||
88 | |||
89 | /* This is used to stop/restart our threads. | ||
90 | * Cannot use SIGTERM nor SIGKILL, since these | ||
91 | * are sent out by init on runlevel changes | ||
92 | * I choose SIGHUP for now. | ||
93 | */ | ||
94 | #define DRBD_SIGKILL SIGHUP | ||
95 | |||
96 | /* All EEs on the free list should have ID_VACANT (== 0) | ||
97 | * freshly allocated EEs get !ID_VACANT (== 1) | ||
98 | * so if it says "cannot dereference null pointer at adress 0x00000001", | ||
99 | * it is most likely one of these :( */ | ||
100 | |||
101 | #define ID_IN_SYNC (4711ULL) | ||
102 | #define ID_OUT_OF_SYNC (4712ULL) | ||
103 | |||
104 | #define ID_SYNCER (-1ULL) | ||
105 | #define ID_VACANT 0 | ||
106 | #define is_syncer_block_id(id) ((id) == ID_SYNCER) | ||
107 | |||
108 | struct drbd_conf; | ||
109 | |||
110 | |||
111 | /* to shorten dev_warn(DEV, "msg"); and relatives statements */ | ||
112 | #define DEV (disk_to_dev(mdev->vdisk)) | ||
113 | |||
114 | #define D_ASSERT(exp) if (!(exp)) \ | ||
115 | dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__) | ||
116 | |||
117 | #define ERR_IF(exp) if (({ \ | ||
118 | int _b = (exp) != 0; \ | ||
119 | if (_b) dev_err(DEV, "%s: (%s) in %s:%d\n", \ | ||
120 | __func__, #exp, __FILE__, __LINE__); \ | ||
121 | _b; \ | ||
122 | })) | ||
123 | |||
124 | /* Defines to control fault insertion */ | ||
125 | enum { | ||
126 | DRBD_FAULT_MD_WR = 0, /* meta data write */ | ||
127 | DRBD_FAULT_MD_RD = 1, /* read */ | ||
128 | DRBD_FAULT_RS_WR = 2, /* resync */ | ||
129 | DRBD_FAULT_RS_RD = 3, | ||
130 | DRBD_FAULT_DT_WR = 4, /* data */ | ||
131 | DRBD_FAULT_DT_RD = 5, | ||
132 | DRBD_FAULT_DT_RA = 6, /* data read ahead */ | ||
133 | DRBD_FAULT_BM_ALLOC = 7, /* bitmap allocation */ | ||
134 | DRBD_FAULT_AL_EE = 8, /* alloc ee */ | ||
135 | |||
136 | DRBD_FAULT_MAX, | ||
137 | }; | ||
138 | |||
139 | #ifdef CONFIG_DRBD_FAULT_INJECTION | ||
140 | extern unsigned int | ||
141 | _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type); | ||
142 | static inline int | ||
143 | drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { | ||
144 | return fault_rate && | ||
145 | (enable_faults & (1<<type)) && | ||
146 | _drbd_insert_fault(mdev, type); | ||
147 | } | ||
148 | #define FAULT_ACTIVE(_m, _t) (drbd_insert_fault((_m), (_t))) | ||
149 | |||
150 | #else | ||
151 | #define FAULT_ACTIVE(_m, _t) (0) | ||
152 | #endif | ||
153 | |||
154 | /* integer division, round _UP_ to the next integer */ | ||
155 | #define div_ceil(A, B) ((A)/(B) + ((A)%(B) ? 1 : 0)) | ||
156 | /* usual integer division */ | ||
157 | #define div_floor(A, B) ((A)/(B)) | ||
158 | |||
159 | /* drbd_meta-data.c (still in drbd_main.c) */ | ||
160 | /* 4th incarnation of the disk layout. */ | ||
161 | #define DRBD_MD_MAGIC (DRBD_MAGIC+4) | ||
162 | |||
163 | extern struct drbd_conf **minor_table; | ||
164 | extern struct ratelimit_state drbd_ratelimit_state; | ||
165 | |||
166 | /* on the wire */ | ||
167 | enum drbd_packets { | ||
168 | /* receiver (data socket) */ | ||
169 | P_DATA = 0x00, | ||
170 | P_DATA_REPLY = 0x01, /* Response to P_DATA_REQUEST */ | ||
171 | P_RS_DATA_REPLY = 0x02, /* Response to P_RS_DATA_REQUEST */ | ||
172 | P_BARRIER = 0x03, | ||
173 | P_BITMAP = 0x04, | ||
174 | P_BECOME_SYNC_TARGET = 0x05, | ||
175 | P_BECOME_SYNC_SOURCE = 0x06, | ||
176 | P_UNPLUG_REMOTE = 0x07, /* Used at various times to hint the peer */ | ||
177 | P_DATA_REQUEST = 0x08, /* Used to ask for a data block */ | ||
178 | P_RS_DATA_REQUEST = 0x09, /* Used to ask for a data block for resync */ | ||
179 | P_SYNC_PARAM = 0x0a, | ||
180 | P_PROTOCOL = 0x0b, | ||
181 | P_UUIDS = 0x0c, | ||
182 | P_SIZES = 0x0d, | ||
183 | P_STATE = 0x0e, | ||
184 | P_SYNC_UUID = 0x0f, | ||
185 | P_AUTH_CHALLENGE = 0x10, | ||
186 | P_AUTH_RESPONSE = 0x11, | ||
187 | P_STATE_CHG_REQ = 0x12, | ||
188 | |||
189 | /* asender (meta socket */ | ||
190 | P_PING = 0x13, | ||
191 | P_PING_ACK = 0x14, | ||
192 | P_RECV_ACK = 0x15, /* Used in protocol B */ | ||
193 | P_WRITE_ACK = 0x16, /* Used in protocol C */ | ||
194 | P_RS_WRITE_ACK = 0x17, /* Is a P_WRITE_ACK, additionally call set_in_sync(). */ | ||
195 | P_DISCARD_ACK = 0x18, /* Used in proto C, two-primaries conflict detection */ | ||
196 | P_NEG_ACK = 0x19, /* Sent if local disk is unusable */ | ||
197 | P_NEG_DREPLY = 0x1a, /* Local disk is broken... */ | ||
198 | P_NEG_RS_DREPLY = 0x1b, /* Local disk is broken... */ | ||
199 | P_BARRIER_ACK = 0x1c, | ||
200 | P_STATE_CHG_REPLY = 0x1d, | ||
201 | |||
202 | /* "new" commands, no longer fitting into the ordering scheme above */ | ||
203 | |||
204 | P_OV_REQUEST = 0x1e, /* data socket */ | ||
205 | P_OV_REPLY = 0x1f, | ||
206 | P_OV_RESULT = 0x20, /* meta socket */ | ||
207 | P_CSUM_RS_REQUEST = 0x21, /* data socket */ | ||
208 | P_RS_IS_IN_SYNC = 0x22, /* meta socket */ | ||
209 | P_SYNC_PARAM89 = 0x23, /* data socket, protocol version 89 replacement for P_SYNC_PARAM */ | ||
210 | P_COMPRESSED_BITMAP = 0x24, /* compressed or otherwise encoded bitmap transfer */ | ||
211 | |||
212 | P_MAX_CMD = 0x25, | ||
213 | P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ | ||
214 | P_MAX_OPT_CMD = 0x101, | ||
215 | |||
216 | /* special command ids for handshake */ | ||
217 | |||
218 | P_HAND_SHAKE_M = 0xfff1, /* First Packet on the MetaSock */ | ||
219 | P_HAND_SHAKE_S = 0xfff2, /* First Packet on the Socket */ | ||
220 | |||
221 | P_HAND_SHAKE = 0xfffe /* FIXED for the next century! */ | ||
222 | }; | ||
223 | |||
224 | static inline const char *cmdname(enum drbd_packets cmd) | ||
225 | { | ||
226 | /* THINK may need to become several global tables | ||
227 | * when we want to support more than | ||
228 | * one PRO_VERSION */ | ||
229 | static const char *cmdnames[] = { | ||
230 | [P_DATA] = "Data", | ||
231 | [P_DATA_REPLY] = "DataReply", | ||
232 | [P_RS_DATA_REPLY] = "RSDataReply", | ||
233 | [P_BARRIER] = "Barrier", | ||
234 | [P_BITMAP] = "ReportBitMap", | ||
235 | [P_BECOME_SYNC_TARGET] = "BecomeSyncTarget", | ||
236 | [P_BECOME_SYNC_SOURCE] = "BecomeSyncSource", | ||
237 | [P_UNPLUG_REMOTE] = "UnplugRemote", | ||
238 | [P_DATA_REQUEST] = "DataRequest", | ||
239 | [P_RS_DATA_REQUEST] = "RSDataRequest", | ||
240 | [P_SYNC_PARAM] = "SyncParam", | ||
241 | [P_SYNC_PARAM89] = "SyncParam89", | ||
242 | [P_PROTOCOL] = "ReportProtocol", | ||
243 | [P_UUIDS] = "ReportUUIDs", | ||
244 | [P_SIZES] = "ReportSizes", | ||
245 | [P_STATE] = "ReportState", | ||
246 | [P_SYNC_UUID] = "ReportSyncUUID", | ||
247 | [P_AUTH_CHALLENGE] = "AuthChallenge", | ||
248 | [P_AUTH_RESPONSE] = "AuthResponse", | ||
249 | [P_PING] = "Ping", | ||
250 | [P_PING_ACK] = "PingAck", | ||
251 | [P_RECV_ACK] = "RecvAck", | ||
252 | [P_WRITE_ACK] = "WriteAck", | ||
253 | [P_RS_WRITE_ACK] = "RSWriteAck", | ||
254 | [P_DISCARD_ACK] = "DiscardAck", | ||
255 | [P_NEG_ACK] = "NegAck", | ||
256 | [P_NEG_DREPLY] = "NegDReply", | ||
257 | [P_NEG_RS_DREPLY] = "NegRSDReply", | ||
258 | [P_BARRIER_ACK] = "BarrierAck", | ||
259 | [P_STATE_CHG_REQ] = "StateChgRequest", | ||
260 | [P_STATE_CHG_REPLY] = "StateChgReply", | ||
261 | [P_OV_REQUEST] = "OVRequest", | ||
262 | [P_OV_REPLY] = "OVReply", | ||
263 | [P_OV_RESULT] = "OVResult", | ||
264 | [P_MAX_CMD] = NULL, | ||
265 | }; | ||
266 | |||
267 | if (cmd == P_HAND_SHAKE_M) | ||
268 | return "HandShakeM"; | ||
269 | if (cmd == P_HAND_SHAKE_S) | ||
270 | return "HandShakeS"; | ||
271 | if (cmd == P_HAND_SHAKE) | ||
272 | return "HandShake"; | ||
273 | if (cmd >= P_MAX_CMD) | ||
274 | return "Unknown"; | ||
275 | return cmdnames[cmd]; | ||
276 | } | ||
277 | |||
278 | /* for sending/receiving the bitmap, | ||
279 | * possibly in some encoding scheme */ | ||
280 | struct bm_xfer_ctx { | ||
281 | /* "const" | ||
282 | * stores total bits and long words | ||
283 | * of the bitmap, so we don't need to | ||
284 | * call the accessor functions over and again. */ | ||
285 | unsigned long bm_bits; | ||
286 | unsigned long bm_words; | ||
287 | /* during xfer, current position within the bitmap */ | ||
288 | unsigned long bit_offset; | ||
289 | unsigned long word_offset; | ||
290 | |||
291 | /* statistics; index: (h->command == P_BITMAP) */ | ||
292 | unsigned packets[2]; | ||
293 | unsigned bytes[2]; | ||
294 | }; | ||
295 | |||
296 | extern void INFO_bm_xfer_stats(struct drbd_conf *mdev, | ||
297 | const char *direction, struct bm_xfer_ctx *c); | ||
298 | |||
299 | static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c) | ||
300 | { | ||
301 | /* word_offset counts "native long words" (32 or 64 bit), | ||
302 | * aligned at 64 bit. | ||
303 | * Encoded packet may end at an unaligned bit offset. | ||
304 | * In case a fallback clear text packet is transmitted in | ||
305 | * between, we adjust this offset back to the last 64bit | ||
306 | * aligned "native long word", which makes coding and decoding | ||
307 | * the plain text bitmap much more convenient. */ | ||
308 | #if BITS_PER_LONG == 64 | ||
309 | c->word_offset = c->bit_offset >> 6; | ||
310 | #elif BITS_PER_LONG == 32 | ||
311 | c->word_offset = c->bit_offset >> 5; | ||
312 | c->word_offset &= ~(1UL); | ||
313 | #else | ||
314 | # error "unsupported BITS_PER_LONG" | ||
315 | #endif | ||
316 | } | ||
317 | |||
318 | #ifndef __packed | ||
319 | #define __packed __attribute__((packed)) | ||
320 | #endif | ||
321 | |||
322 | /* This is the layout for a packet on the wire. | ||
323 | * The byteorder is the network byte order. | ||
324 | * (except block_id and barrier fields. | ||
325 | * these are pointers to local structs | ||
326 | * and have no relevance for the partner, | ||
327 | * which just echoes them as received.) | ||
328 | * | ||
329 | * NOTE that the payload starts at a long aligned offset, | ||
330 | * regardless of 32 or 64 bit arch! | ||
331 | */ | ||
332 | struct p_header { | ||
333 | u32 magic; | ||
334 | u16 command; | ||
335 | u16 length; /* bytes of data after this header */ | ||
336 | u8 payload[0]; | ||
337 | } __packed; | ||
338 | /* 8 bytes. packet FIXED for the next century! */ | ||
339 | |||
340 | /* | ||
341 | * short commands, packets without payload, plain p_header: | ||
342 | * P_PING | ||
343 | * P_PING_ACK | ||
344 | * P_BECOME_SYNC_TARGET | ||
345 | * P_BECOME_SYNC_SOURCE | ||
346 | * P_UNPLUG_REMOTE | ||
347 | */ | ||
348 | |||
349 | /* | ||
350 | * commands with out-of-struct payload: | ||
351 | * P_BITMAP (no additional fields) | ||
352 | * P_DATA, P_DATA_REPLY (see p_data) | ||
353 | * P_COMPRESSED_BITMAP (see receive_compressed_bitmap) | ||
354 | */ | ||
355 | |||
356 | /* these defines must not be changed without changing the protocol version */ | ||
357 | #define DP_HARDBARRIER 1 | ||
358 | #define DP_RW_SYNC 2 | ||
359 | #define DP_MAY_SET_IN_SYNC 4 | ||
360 | |||
361 | struct p_data { | ||
362 | struct p_header head; | ||
363 | u64 sector; /* 64 bits sector number */ | ||
364 | u64 block_id; /* to identify the request in protocol B&C */ | ||
365 | u32 seq_num; | ||
366 | u32 dp_flags; | ||
367 | } __packed; | ||
368 | |||
369 | /* | ||
370 | * commands which share a struct: | ||
371 | * p_block_ack: | ||
372 | * P_RECV_ACK (proto B), P_WRITE_ACK (proto C), | ||
373 | * P_DISCARD_ACK (proto C, two-primaries conflict detection) | ||
374 | * p_block_req: | ||
375 | * P_DATA_REQUEST, P_RS_DATA_REQUEST | ||
376 | */ | ||
377 | struct p_block_ack { | ||
378 | struct p_header head; | ||
379 | u64 sector; | ||
380 | u64 block_id; | ||
381 | u32 blksize; | ||
382 | u32 seq_num; | ||
383 | } __packed; | ||
384 | |||
385 | |||
386 | struct p_block_req { | ||
387 | struct p_header head; | ||
388 | u64 sector; | ||
389 | u64 block_id; | ||
390 | u32 blksize; | ||
391 | u32 pad; /* to multiple of 8 Byte */ | ||
392 | } __packed; | ||
393 | |||
394 | /* | ||
395 | * commands with their own struct for additional fields: | ||
396 | * P_HAND_SHAKE | ||
397 | * P_BARRIER | ||
398 | * P_BARRIER_ACK | ||
399 | * P_SYNC_PARAM | ||
400 | * ReportParams | ||
401 | */ | ||
402 | |||
403 | struct p_handshake { | ||
404 | struct p_header head; /* 8 bytes */ | ||
405 | u32 protocol_min; | ||
406 | u32 feature_flags; | ||
407 | u32 protocol_max; | ||
408 | |||
409 | /* should be more than enough for future enhancements | ||
410 | * for now, feature_flags and the reserverd array shall be zero. | ||
411 | */ | ||
412 | |||
413 | u32 _pad; | ||
414 | u64 reserverd[7]; | ||
415 | } __packed; | ||
416 | /* 80 bytes, FIXED for the next century */ | ||
417 | |||
418 | struct p_barrier { | ||
419 | struct p_header head; | ||
420 | u32 barrier; /* barrier number _handle_ only */ | ||
421 | u32 pad; /* to multiple of 8 Byte */ | ||
422 | } __packed; | ||
423 | |||
424 | struct p_barrier_ack { | ||
425 | struct p_header head; | ||
426 | u32 barrier; | ||
427 | u32 set_size; | ||
428 | } __packed; | ||
429 | |||
430 | struct p_rs_param { | ||
431 | struct p_header head; | ||
432 | u32 rate; | ||
433 | |||
434 | /* Since protocol version 88 and higher. */ | ||
435 | char verify_alg[0]; | ||
436 | } __packed; | ||
437 | |||
438 | struct p_rs_param_89 { | ||
439 | struct p_header head; | ||
440 | u32 rate; | ||
441 | /* protocol version 89: */ | ||
442 | char verify_alg[SHARED_SECRET_MAX]; | ||
443 | char csums_alg[SHARED_SECRET_MAX]; | ||
444 | } __packed; | ||
445 | |||
446 | struct p_protocol { | ||
447 | struct p_header head; | ||
448 | u32 protocol; | ||
449 | u32 after_sb_0p; | ||
450 | u32 after_sb_1p; | ||
451 | u32 after_sb_2p; | ||
452 | u32 want_lose; | ||
453 | u32 two_primaries; | ||
454 | |||
455 | /* Since protocol version 87 and higher. */ | ||
456 | char integrity_alg[0]; | ||
457 | |||
458 | } __packed; | ||
459 | |||
460 | struct p_uuids { | ||
461 | struct p_header head; | ||
462 | u64 uuid[UI_EXTENDED_SIZE]; | ||
463 | } __packed; | ||
464 | |||
465 | struct p_rs_uuid { | ||
466 | struct p_header head; | ||
467 | u64 uuid; | ||
468 | } __packed; | ||
469 | |||
470 | struct p_sizes { | ||
471 | struct p_header head; | ||
472 | u64 d_size; /* size of disk */ | ||
473 | u64 u_size; /* user requested size */ | ||
474 | u64 c_size; /* current exported size */ | ||
475 | u32 max_segment_size; /* Maximal size of a BIO */ | ||
476 | u32 queue_order_type; | ||
477 | } __packed; | ||
478 | |||
479 | struct p_state { | ||
480 | struct p_header head; | ||
481 | u32 state; | ||
482 | } __packed; | ||
483 | |||
484 | struct p_req_state { | ||
485 | struct p_header head; | ||
486 | u32 mask; | ||
487 | u32 val; | ||
488 | } __packed; | ||
489 | |||
490 | struct p_req_state_reply { | ||
491 | struct p_header head; | ||
492 | u32 retcode; | ||
493 | } __packed; | ||
494 | |||
495 | struct p_drbd06_param { | ||
496 | u64 size; | ||
497 | u32 state; | ||
498 | u32 blksize; | ||
499 | u32 protocol; | ||
500 | u32 version; | ||
501 | u32 gen_cnt[5]; | ||
502 | u32 bit_map_gen[5]; | ||
503 | } __packed; | ||
504 | |||
505 | struct p_discard { | ||
506 | struct p_header head; | ||
507 | u64 block_id; | ||
508 | u32 seq_num; | ||
509 | u32 pad; | ||
510 | } __packed; | ||
511 | |||
512 | /* Valid values for the encoding field. | ||
513 | * Bump proto version when changing this. */ | ||
514 | enum drbd_bitmap_code { | ||
515 | /* RLE_VLI_Bytes = 0, | ||
516 | * and other bit variants had been defined during | ||
517 | * algorithm evaluation. */ | ||
518 | RLE_VLI_Bits = 2, | ||
519 | }; | ||
520 | |||
521 | struct p_compressed_bm { | ||
522 | struct p_header head; | ||
523 | /* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code | ||
524 | * (encoding & 0x80): polarity (set/unset) of first runlength | ||
525 | * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits | ||
526 | * used to pad up to head.length bytes | ||
527 | */ | ||
528 | u8 encoding; | ||
529 | |||
530 | u8 code[0]; | ||
531 | } __packed; | ||
532 | |||
533 | /* DCBP: Drbd Compressed Bitmap Packet ... */ | ||
534 | static inline enum drbd_bitmap_code | ||
535 | DCBP_get_code(struct p_compressed_bm *p) | ||
536 | { | ||
537 | return (enum drbd_bitmap_code)(p->encoding & 0x0f); | ||
538 | } | ||
539 | |||
540 | static inline void | ||
541 | DCBP_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code) | ||
542 | { | ||
543 | BUG_ON(code & ~0xf); | ||
544 | p->encoding = (p->encoding & ~0xf) | code; | ||
545 | } | ||
546 | |||
547 | static inline int | ||
548 | DCBP_get_start(struct p_compressed_bm *p) | ||
549 | { | ||
550 | return (p->encoding & 0x80) != 0; | ||
551 | } | ||
552 | |||
553 | static inline void | ||
554 | DCBP_set_start(struct p_compressed_bm *p, int set) | ||
555 | { | ||
556 | p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0); | ||
557 | } | ||
558 | |||
559 | static inline int | ||
560 | DCBP_get_pad_bits(struct p_compressed_bm *p) | ||
561 | { | ||
562 | return (p->encoding >> 4) & 0x7; | ||
563 | } | ||
564 | |||
565 | static inline void | ||
566 | DCBP_set_pad_bits(struct p_compressed_bm *p, int n) | ||
567 | { | ||
568 | BUG_ON(n & ~0x7); | ||
569 | p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4); | ||
570 | } | ||
571 | |||
572 | /* one bitmap packet, including the p_header, | ||
573 | * should fit within one _architecture independend_ page. | ||
574 | * so we need to use the fixed size 4KiB page size | ||
575 | * most architechtures have used for a long time. | ||
576 | */ | ||
577 | #define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header)) | ||
578 | #define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long)) | ||
579 | #define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm)) | ||
580 | #if (PAGE_SIZE < 4096) | ||
581 | /* drbd_send_bitmap / receive_bitmap would break horribly */ | ||
582 | #error "PAGE_SIZE too small" | ||
583 | #endif | ||
584 | |||
585 | union p_polymorph { | ||
586 | struct p_header header; | ||
587 | struct p_handshake handshake; | ||
588 | struct p_data data; | ||
589 | struct p_block_ack block_ack; | ||
590 | struct p_barrier barrier; | ||
591 | struct p_barrier_ack barrier_ack; | ||
592 | struct p_rs_param_89 rs_param_89; | ||
593 | struct p_protocol protocol; | ||
594 | struct p_sizes sizes; | ||
595 | struct p_uuids uuids; | ||
596 | struct p_state state; | ||
597 | struct p_req_state req_state; | ||
598 | struct p_req_state_reply req_state_reply; | ||
599 | struct p_block_req block_req; | ||
600 | } __packed; | ||
601 | |||
602 | /**********************************************************************/ | ||
603 | enum drbd_thread_state { | ||
604 | None, | ||
605 | Running, | ||
606 | Exiting, | ||
607 | Restarting | ||
608 | }; | ||
609 | |||
610 | struct drbd_thread { | ||
611 | spinlock_t t_lock; | ||
612 | struct task_struct *task; | ||
613 | struct completion stop; | ||
614 | enum drbd_thread_state t_state; | ||
615 | int (*function) (struct drbd_thread *); | ||
616 | struct drbd_conf *mdev; | ||
617 | int reset_cpu_mask; | ||
618 | }; | ||
619 | |||
620 | static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi) | ||
621 | { | ||
622 | /* THINK testing the t_state seems to be uncritical in all cases | ||
623 | * (but thread_{start,stop}), so we can read it *without* the lock. | ||
624 | * --lge */ | ||
625 | |||
626 | smp_rmb(); | ||
627 | return thi->t_state; | ||
628 | } | ||
629 | |||
630 | |||
631 | /* | ||
632 | * Having this as the first member of a struct provides sort of "inheritance". | ||
633 | * "derived" structs can be "drbd_queue_work()"ed. | ||
634 | * The callback should know and cast back to the descendant struct. | ||
635 | * drbd_request and drbd_epoch_entry are descendants of drbd_work. | ||
636 | */ | ||
637 | struct drbd_work; | ||
638 | typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel); | ||
639 | struct drbd_work { | ||
640 | struct list_head list; | ||
641 | drbd_work_cb cb; | ||
642 | }; | ||
643 | |||
644 | struct drbd_tl_epoch; | ||
645 | struct drbd_request { | ||
646 | struct drbd_work w; | ||
647 | struct drbd_conf *mdev; | ||
648 | |||
649 | /* if local IO is not allowed, will be NULL. | ||
650 | * if local IO _is_ allowed, holds the locally submitted bio clone, | ||
651 | * or, after local IO completion, the ERR_PTR(error). | ||
652 | * see drbd_endio_pri(). */ | ||
653 | struct bio *private_bio; | ||
654 | |||
655 | struct hlist_node colision; | ||
656 | sector_t sector; | ||
657 | unsigned int size; | ||
658 | unsigned int epoch; /* barrier_nr */ | ||
659 | |||
660 | /* barrier_nr: used to check on "completion" whether this req was in | ||
661 | * the current epoch, and we therefore have to close it, | ||
662 | * starting a new epoch... | ||
663 | */ | ||
664 | |||
665 | /* up to here, the struct layout is identical to drbd_epoch_entry; | ||
666 | * we might be able to use that to our advantage... */ | ||
667 | |||
668 | struct list_head tl_requests; /* ring list in the transfer log */ | ||
669 | struct bio *master_bio; /* master bio pointer */ | ||
670 | unsigned long rq_state; /* see comments above _req_mod() */ | ||
671 | int seq_num; | ||
672 | unsigned long start_time; | ||
673 | }; | ||
674 | |||
675 | struct drbd_tl_epoch { | ||
676 | struct drbd_work w; | ||
677 | struct list_head requests; /* requests before */ | ||
678 | struct drbd_tl_epoch *next; /* pointer to the next barrier */ | ||
679 | unsigned int br_number; /* the barriers identifier. */ | ||
680 | int n_req; /* number of requests attached before this barrier */ | ||
681 | }; | ||
682 | |||
683 | struct drbd_request; | ||
684 | |||
685 | /* These Tl_epoch_entries may be in one of 6 lists: | ||
686 | active_ee .. data packet being written | ||
687 | sync_ee .. syncer block being written | ||
688 | done_ee .. block written, need to send P_WRITE_ACK | ||
689 | read_ee .. [RS]P_DATA_REQUEST being read | ||
690 | */ | ||
691 | |||
692 | struct drbd_epoch { | ||
693 | struct list_head list; | ||
694 | unsigned int barrier_nr; | ||
695 | atomic_t epoch_size; /* increased on every request added. */ | ||
696 | atomic_t active; /* increased on every req. added, and dec on every finished. */ | ||
697 | unsigned long flags; | ||
698 | }; | ||
699 | |||
700 | /* drbd_epoch flag bits */ | ||
701 | enum { | ||
702 | DE_BARRIER_IN_NEXT_EPOCH_ISSUED, | ||
703 | DE_BARRIER_IN_NEXT_EPOCH_DONE, | ||
704 | DE_CONTAINS_A_BARRIER, | ||
705 | DE_HAVE_BARRIER_NUMBER, | ||
706 | DE_IS_FINISHING, | ||
707 | }; | ||
708 | |||
709 | enum epoch_event { | ||
710 | EV_PUT, | ||
711 | EV_GOT_BARRIER_NR, | ||
712 | EV_BARRIER_DONE, | ||
713 | EV_BECAME_LAST, | ||
714 | EV_CLEANUP = 32, /* used as flag */ | ||
715 | }; | ||
716 | |||
717 | struct drbd_epoch_entry { | ||
718 | struct drbd_work w; | ||
719 | struct drbd_conf *mdev; | ||
720 | struct bio *private_bio; | ||
721 | struct hlist_node colision; | ||
722 | sector_t sector; | ||
723 | unsigned int size; | ||
724 | struct drbd_epoch *epoch; | ||
725 | |||
726 | /* up to here, the struct layout is identical to drbd_request; | ||
727 | * we might be able to use that to our advantage... */ | ||
728 | |||
729 | unsigned int flags; | ||
730 | u64 block_id; | ||
731 | }; | ||
732 | |||
733 | struct drbd_wq_barrier { | ||
734 | struct drbd_work w; | ||
735 | struct completion done; | ||
736 | }; | ||
737 | |||
738 | struct digest_info { | ||
739 | int digest_size; | ||
740 | void *digest; | ||
741 | }; | ||
742 | |||
743 | /* ee flag bits */ | ||
744 | enum { | ||
745 | __EE_CALL_AL_COMPLETE_IO, | ||
746 | __EE_CONFLICT_PENDING, | ||
747 | __EE_MAY_SET_IN_SYNC, | ||
748 | __EE_IS_BARRIER, | ||
749 | }; | ||
750 | #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) | ||
751 | #define EE_CONFLICT_PENDING (1<<__EE_CONFLICT_PENDING) | ||
752 | #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) | ||
753 | #define EE_IS_BARRIER (1<<__EE_IS_BARRIER) | ||
754 | |||
755 | /* global flag bits */ | ||
756 | enum { | ||
757 | CREATE_BARRIER, /* next P_DATA is preceeded by a P_BARRIER */ | ||
758 | SIGNAL_ASENDER, /* whether asender wants to be interrupted */ | ||
759 | SEND_PING, /* whether asender should send a ping asap */ | ||
760 | |||
761 | STOP_SYNC_TIMER, /* tell timer to cancel itself */ | ||
762 | UNPLUG_QUEUED, /* only relevant with kernel 2.4 */ | ||
763 | UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */ | ||
764 | MD_DIRTY, /* current uuids and flags not yet on disk */ | ||
765 | DISCARD_CONCURRENT, /* Set on one node, cleared on the peer! */ | ||
766 | USE_DEGR_WFC_T, /* degr-wfc-timeout instead of wfc-timeout. */ | ||
767 | CLUSTER_ST_CHANGE, /* Cluster wide state change going on... */ | ||
768 | CL_ST_CHG_SUCCESS, | ||
769 | CL_ST_CHG_FAIL, | ||
770 | CRASHED_PRIMARY, /* This node was a crashed primary. | ||
771 | * Gets cleared when the state.conn | ||
772 | * goes into C_CONNECTED state. */ | ||
773 | WRITE_BM_AFTER_RESYNC, /* A kmalloc() during resync failed */ | ||
774 | NO_BARRIER_SUPP, /* underlying block device doesn't implement barriers */ | ||
775 | CONSIDER_RESYNC, | ||
776 | |||
777 | MD_NO_BARRIER, /* meta data device does not support barriers, | ||
778 | so don't even try */ | ||
779 | SUSPEND_IO, /* suspend application io */ | ||
780 | BITMAP_IO, /* suspend application io; | ||
781 | once no more io in flight, start bitmap io */ | ||
782 | BITMAP_IO_QUEUED, /* Started bitmap IO */ | ||
783 | RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ | ||
784 | NET_CONGESTED, /* The data socket is congested */ | ||
785 | |||
786 | CONFIG_PENDING, /* serialization of (re)configuration requests. | ||
787 | * if set, also prevents the device from dying */ | ||
788 | DEVICE_DYING, /* device became unconfigured, | ||
789 | * but worker thread is still handling the cleanup. | ||
790 | * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed, | ||
791 | * while this is set. */ | ||
792 | RESIZE_PENDING, /* Size change detected locally, waiting for the response from | ||
793 | * the peer, if it changed there as well. */ | ||
794 | }; | ||
795 | |||
796 | struct drbd_bitmap; /* opaque for drbd_conf */ | ||
797 | |||
798 | /* TODO sort members for performance | ||
799 | * MAYBE group them further */ | ||
800 | |||
801 | /* THINK maybe we actually want to use the default "event/%s" worker threads | ||
802 | * or similar in linux 2.6, which uses per cpu data and threads. | ||
803 | * | ||
804 | * To be general, this might need a spin_lock member. | ||
805 | * For now, please use the mdev->req_lock to protect list_head, | ||
806 | * see drbd_queue_work below. | ||
807 | */ | ||
808 | struct drbd_work_queue { | ||
809 | struct list_head q; | ||
810 | struct semaphore s; /* producers up it, worker down()s it */ | ||
811 | spinlock_t q_lock; /* to protect the list. */ | ||
812 | }; | ||
813 | |||
814 | struct drbd_socket { | ||
815 | struct drbd_work_queue work; | ||
816 | struct mutex mutex; | ||
817 | struct socket *socket; | ||
818 | /* this way we get our | ||
819 | * send/receive buffers off the stack */ | ||
820 | union p_polymorph sbuf; | ||
821 | union p_polymorph rbuf; | ||
822 | }; | ||
823 | |||
824 | struct drbd_md { | ||
825 | u64 md_offset; /* sector offset to 'super' block */ | ||
826 | |||
827 | u64 la_size_sect; /* last agreed size, unit sectors */ | ||
828 | u64 uuid[UI_SIZE]; | ||
829 | u64 device_uuid; | ||
830 | u32 flags; | ||
831 | u32 md_size_sect; | ||
832 | |||
833 | s32 al_offset; /* signed relative sector offset to al area */ | ||
834 | s32 bm_offset; /* signed relative sector offset to bitmap */ | ||
835 | |||
836 | /* u32 al_nr_extents; important for restoring the AL | ||
837 | * is stored into sync_conf.al_extents, which in turn | ||
838 | * gets applied to act_log->nr_elements | ||
839 | */ | ||
840 | }; | ||
841 | |||
842 | /* for sync_conf and other types... */ | ||
843 | #define NL_PACKET(name, number, fields) struct name { fields }; | ||
844 | #define NL_INTEGER(pn,pr,member) int member; | ||
845 | #define NL_INT64(pn,pr,member) __u64 member; | ||
846 | #define NL_BIT(pn,pr,member) unsigned member:1; | ||
847 | #define NL_STRING(pn,pr,member,len) unsigned char member[len]; int member ## _len; | ||
848 | #include "linux/drbd_nl.h" | ||
849 | |||
850 | struct drbd_backing_dev { | ||
851 | struct block_device *backing_bdev; | ||
852 | struct block_device *md_bdev; | ||
853 | struct file *lo_file; | ||
854 | struct file *md_file; | ||
855 | struct drbd_md md; | ||
856 | struct disk_conf dc; /* The user provided config... */ | ||
857 | sector_t known_size; /* last known size of that backing device */ | ||
858 | }; | ||
859 | |||
860 | struct drbd_md_io { | ||
861 | struct drbd_conf *mdev; | ||
862 | struct completion event; | ||
863 | int error; | ||
864 | }; | ||
865 | |||
866 | struct bm_io_work { | ||
867 | struct drbd_work w; | ||
868 | char *why; | ||
869 | int (*io_fn)(struct drbd_conf *mdev); | ||
870 | void (*done)(struct drbd_conf *mdev, int rv); | ||
871 | }; | ||
872 | |||
873 | enum write_ordering_e { | ||
874 | WO_none, | ||
875 | WO_drain_io, | ||
876 | WO_bdev_flush, | ||
877 | WO_bio_barrier | ||
878 | }; | ||
879 | |||
880 | struct drbd_conf { | ||
881 | /* things that are stored as / read from meta data on disk */ | ||
882 | unsigned long flags; | ||
883 | |||
884 | /* configured by drbdsetup */ | ||
885 | struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ | ||
886 | struct syncer_conf sync_conf; | ||
887 | struct drbd_backing_dev *ldev __protected_by(local); | ||
888 | |||
889 | sector_t p_size; /* partner's disk size */ | ||
890 | struct request_queue *rq_queue; | ||
891 | struct block_device *this_bdev; | ||
892 | struct gendisk *vdisk; | ||
893 | |||
894 | struct drbd_socket data; /* data/barrier/cstate/parameter packets */ | ||
895 | struct drbd_socket meta; /* ping/ack (metadata) packets */ | ||
896 | int agreed_pro_version; /* actually used protocol version */ | ||
897 | unsigned long last_received; /* in jiffies, either socket */ | ||
898 | unsigned int ko_count; | ||
899 | struct drbd_work resync_work, | ||
900 | unplug_work, | ||
901 | md_sync_work; | ||
902 | struct timer_list resync_timer; | ||
903 | struct timer_list md_sync_timer; | ||
904 | |||
905 | /* Used after attach while negotiating new disk state. */ | ||
906 | union drbd_state new_state_tmp; | ||
907 | |||
908 | union drbd_state state; | ||
909 | wait_queue_head_t misc_wait; | ||
910 | wait_queue_head_t state_wait; /* upon each state change. */ | ||
911 | unsigned int send_cnt; | ||
912 | unsigned int recv_cnt; | ||
913 | unsigned int read_cnt; | ||
914 | unsigned int writ_cnt; | ||
915 | unsigned int al_writ_cnt; | ||
916 | unsigned int bm_writ_cnt; | ||
917 | atomic_t ap_bio_cnt; /* Requests we need to complete */ | ||
918 | atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */ | ||
919 | atomic_t rs_pending_cnt; /* RS request/data packets on the wire */ | ||
920 | atomic_t unacked_cnt; /* Need to send replys for */ | ||
921 | atomic_t local_cnt; /* Waiting for local completion */ | ||
922 | atomic_t net_cnt; /* Users of net_conf */ | ||
923 | spinlock_t req_lock; | ||
924 | struct drbd_tl_epoch *unused_spare_tle; /* for pre-allocation */ | ||
925 | struct drbd_tl_epoch *newest_tle; | ||
926 | struct drbd_tl_epoch *oldest_tle; | ||
927 | struct list_head out_of_sequence_requests; | ||
928 | struct hlist_head *tl_hash; | ||
929 | unsigned int tl_hash_s; | ||
930 | |||
931 | /* blocks to sync in this run [unit BM_BLOCK_SIZE] */ | ||
932 | unsigned long rs_total; | ||
933 | /* number of sync IOs that failed in this run */ | ||
934 | unsigned long rs_failed; | ||
935 | /* Syncer's start time [unit jiffies] */ | ||
936 | unsigned long rs_start; | ||
937 | /* cumulated time in PausedSyncX state [unit jiffies] */ | ||
938 | unsigned long rs_paused; | ||
939 | /* block not up-to-date at mark [unit BM_BLOCK_SIZE] */ | ||
940 | unsigned long rs_mark_left; | ||
941 | /* marks's time [unit jiffies] */ | ||
942 | unsigned long rs_mark_time; | ||
943 | /* skipped because csum was equeal [unit BM_BLOCK_SIZE] */ | ||
944 | unsigned long rs_same_csum; | ||
945 | |||
946 | /* where does the admin want us to start? (sector) */ | ||
947 | sector_t ov_start_sector; | ||
948 | /* where are we now? (sector) */ | ||
949 | sector_t ov_position; | ||
950 | /* Start sector of out of sync range (to merge printk reporting). */ | ||
951 | sector_t ov_last_oos_start; | ||
952 | /* size of out-of-sync range in sectors. */ | ||
953 | sector_t ov_last_oos_size; | ||
954 | unsigned long ov_left; /* in bits */ | ||
955 | struct crypto_hash *csums_tfm; | ||
956 | struct crypto_hash *verify_tfm; | ||
957 | |||
958 | struct drbd_thread receiver; | ||
959 | struct drbd_thread worker; | ||
960 | struct drbd_thread asender; | ||
961 | struct drbd_bitmap *bitmap; | ||
962 | unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */ | ||
963 | |||
964 | /* Used to track operations of resync... */ | ||
965 | struct lru_cache *resync; | ||
966 | /* Number of locked elements in resync LRU */ | ||
967 | unsigned int resync_locked; | ||
968 | /* resync extent number waiting for application requests */ | ||
969 | unsigned int resync_wenr; | ||
970 | |||
971 | int open_cnt; | ||
972 | u64 *p_uuid; | ||
973 | struct drbd_epoch *current_epoch; | ||
974 | spinlock_t epoch_lock; | ||
975 | unsigned int epochs; | ||
976 | enum write_ordering_e write_ordering; | ||
977 | struct list_head active_ee; /* IO in progress */ | ||
978 | struct list_head sync_ee; /* IO in progress */ | ||
979 | struct list_head done_ee; /* send ack */ | ||
980 | struct list_head read_ee; /* IO in progress */ | ||
981 | struct list_head net_ee; /* zero-copy network send in progress */ | ||
982 | struct hlist_head *ee_hash; /* is proteced by req_lock! */ | ||
983 | unsigned int ee_hash_s; | ||
984 | |||
985 | /* this one is protected by ee_lock, single thread */ | ||
986 | struct drbd_epoch_entry *last_write_w_barrier; | ||
987 | |||
988 | int next_barrier_nr; | ||
989 | struct hlist_head *app_reads_hash; /* is proteced by req_lock */ | ||
990 | struct list_head resync_reads; | ||
991 | atomic_t pp_in_use; | ||
992 | wait_queue_head_t ee_wait; | ||
993 | struct page *md_io_page; /* one page buffer for md_io */ | ||
994 | struct page *md_io_tmpp; /* for logical_block_size != 512 */ | ||
995 | struct mutex md_io_mutex; /* protects the md_io_buffer */ | ||
996 | spinlock_t al_lock; | ||
997 | wait_queue_head_t al_wait; | ||
998 | struct lru_cache *act_log; /* activity log */ | ||
999 | unsigned int al_tr_number; | ||
1000 | int al_tr_cycle; | ||
1001 | int al_tr_pos; /* position of the next transaction in the journal */ | ||
1002 | struct crypto_hash *cram_hmac_tfm; | ||
1003 | struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */ | ||
1004 | struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */ | ||
1005 | void *int_dig_out; | ||
1006 | void *int_dig_in; | ||
1007 | void *int_dig_vv; | ||
1008 | wait_queue_head_t seq_wait; | ||
1009 | atomic_t packet_seq; | ||
1010 | unsigned int peer_seq; | ||
1011 | spinlock_t peer_seq_lock; | ||
1012 | unsigned int minor; | ||
1013 | unsigned long comm_bm_set; /* communicated number of set bits. */ | ||
1014 | cpumask_var_t cpu_mask; | ||
1015 | struct bm_io_work bm_io_work; | ||
1016 | u64 ed_uuid; /* UUID of the exposed data */ | ||
1017 | struct mutex state_mutex; | ||
1018 | char congestion_reason; /* Why we where congested... */ | ||
1019 | }; | ||
1020 | |||
1021 | static inline struct drbd_conf *minor_to_mdev(unsigned int minor) | ||
1022 | { | ||
1023 | struct drbd_conf *mdev; | ||
1024 | |||
1025 | mdev = minor < minor_count ? minor_table[minor] : NULL; | ||
1026 | |||
1027 | return mdev; | ||
1028 | } | ||
1029 | |||
1030 | static inline unsigned int mdev_to_minor(struct drbd_conf *mdev) | ||
1031 | { | ||
1032 | return mdev->minor; | ||
1033 | } | ||
1034 | |||
1035 | /* returns 1 if it was successfull, | ||
1036 | * returns 0 if there was no data socket. | ||
1037 | * so wherever you are going to use the data.socket, e.g. do | ||
1038 | * if (!drbd_get_data_sock(mdev)) | ||
1039 | * return 0; | ||
1040 | * CODE(); | ||
1041 | * drbd_put_data_sock(mdev); | ||
1042 | */ | ||
1043 | static inline int drbd_get_data_sock(struct drbd_conf *mdev) | ||
1044 | { | ||
1045 | mutex_lock(&mdev->data.mutex); | ||
1046 | /* drbd_disconnect() could have called drbd_free_sock() | ||
1047 | * while we were waiting in down()... */ | ||
1048 | if (unlikely(mdev->data.socket == NULL)) { | ||
1049 | mutex_unlock(&mdev->data.mutex); | ||
1050 | return 0; | ||
1051 | } | ||
1052 | return 1; | ||
1053 | } | ||
1054 | |||
1055 | static inline void drbd_put_data_sock(struct drbd_conf *mdev) | ||
1056 | { | ||
1057 | mutex_unlock(&mdev->data.mutex); | ||
1058 | } | ||
1059 | |||
1060 | /* | ||
1061 | * function declarations | ||
1062 | *************************/ | ||
1063 | |||
1064 | /* drbd_main.c */ | ||
1065 | |||
1066 | enum chg_state_flags { | ||
1067 | CS_HARD = 1, | ||
1068 | CS_VERBOSE = 2, | ||
1069 | CS_WAIT_COMPLETE = 4, | ||
1070 | CS_SERIALIZE = 8, | ||
1071 | CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE, | ||
1072 | }; | ||
1073 | |||
1074 | extern void drbd_init_set_defaults(struct drbd_conf *mdev); | ||
1075 | extern int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, | ||
1076 | union drbd_state mask, union drbd_state val); | ||
1077 | extern void drbd_force_state(struct drbd_conf *, union drbd_state, | ||
1078 | union drbd_state); | ||
1079 | extern int _drbd_request_state(struct drbd_conf *, union drbd_state, | ||
1080 | union drbd_state, enum chg_state_flags); | ||
1081 | extern int __drbd_set_state(struct drbd_conf *, union drbd_state, | ||
1082 | enum chg_state_flags, struct completion *done); | ||
1083 | extern void print_st_err(struct drbd_conf *, union drbd_state, | ||
1084 | union drbd_state, int); | ||
1085 | extern int drbd_thread_start(struct drbd_thread *thi); | ||
1086 | extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); | ||
1087 | #ifdef CONFIG_SMP | ||
1088 | extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev); | ||
1089 | extern void drbd_calc_cpu_mask(struct drbd_conf *mdev); | ||
1090 | #else | ||
1091 | #define drbd_thread_current_set_cpu(A) ({}) | ||
1092 | #define drbd_calc_cpu_mask(A) ({}) | ||
1093 | #endif | ||
1094 | extern void drbd_free_resources(struct drbd_conf *mdev); | ||
1095 | extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, | ||
1096 | unsigned int set_size); | ||
1097 | extern void tl_clear(struct drbd_conf *mdev); | ||
1098 | extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *); | ||
1099 | extern void drbd_free_sock(struct drbd_conf *mdev); | ||
1100 | extern int drbd_send(struct drbd_conf *mdev, struct socket *sock, | ||
1101 | void *buf, size_t size, unsigned msg_flags); | ||
1102 | extern int drbd_send_protocol(struct drbd_conf *mdev); | ||
1103 | extern int drbd_send_uuids(struct drbd_conf *mdev); | ||
1104 | extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); | ||
1105 | extern int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val); | ||
1106 | extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply); | ||
1107 | extern int _drbd_send_state(struct drbd_conf *mdev); | ||
1108 | extern int drbd_send_state(struct drbd_conf *mdev); | ||
1109 | extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, | ||
1110 | enum drbd_packets cmd, struct p_header *h, | ||
1111 | size_t size, unsigned msg_flags); | ||
1112 | #define USE_DATA_SOCKET 1 | ||
1113 | #define USE_META_SOCKET 0 | ||
1114 | extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, | ||
1115 | enum drbd_packets cmd, struct p_header *h, | ||
1116 | size_t size); | ||
1117 | extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, | ||
1118 | char *data, size_t size); | ||
1119 | extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc); | ||
1120 | extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, | ||
1121 | u32 set_size); | ||
1122 | extern int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, | ||
1123 | struct drbd_epoch_entry *e); | ||
1124 | extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd, | ||
1125 | struct p_block_req *rp); | ||
1126 | extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, | ||
1127 | struct p_data *dp); | ||
1128 | extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd, | ||
1129 | sector_t sector, int blksize, u64 block_id); | ||
1130 | extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, | ||
1131 | struct drbd_epoch_entry *e); | ||
1132 | extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req); | ||
1133 | extern int _drbd_send_barrier(struct drbd_conf *mdev, | ||
1134 | struct drbd_tl_epoch *barrier); | ||
1135 | extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd, | ||
1136 | sector_t sector, int size, u64 block_id); | ||
1137 | extern int drbd_send_drequest_csum(struct drbd_conf *mdev, | ||
1138 | sector_t sector,int size, | ||
1139 | void *digest, int digest_size, | ||
1140 | enum drbd_packets cmd); | ||
1141 | extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size); | ||
1142 | |||
1143 | extern int drbd_send_bitmap(struct drbd_conf *mdev); | ||
1144 | extern int _drbd_send_bitmap(struct drbd_conf *mdev); | ||
1145 | extern int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode); | ||
1146 | extern void drbd_free_bc(struct drbd_backing_dev *ldev); | ||
1147 | extern void drbd_mdev_cleanup(struct drbd_conf *mdev); | ||
1148 | |||
1149 | /* drbd_meta-data.c (still in drbd_main.c) */ | ||
1150 | extern void drbd_md_sync(struct drbd_conf *mdev); | ||
1151 | extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev); | ||
1152 | /* maybe define them below as inline? */ | ||
1153 | extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); | ||
1154 | extern void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); | ||
1155 | extern void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local); | ||
1156 | extern void _drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local); | ||
1157 | extern void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local); | ||
1158 | extern void drbd_md_set_flag(struct drbd_conf *mdev, int flags) __must_hold(local); | ||
1159 | extern void drbd_md_clear_flag(struct drbd_conf *mdev, int flags)__must_hold(local); | ||
1160 | extern int drbd_md_test_flag(struct drbd_backing_dev *, int); | ||
1161 | extern void drbd_md_mark_dirty(struct drbd_conf *mdev); | ||
1162 | extern void drbd_queue_bitmap_io(struct drbd_conf *mdev, | ||
1163 | int (*io_fn)(struct drbd_conf *), | ||
1164 | void (*done)(struct drbd_conf *, int), | ||
1165 | char *why); | ||
1166 | extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); | ||
1167 | extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); | ||
1168 | extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why); | ||
1169 | |||
1170 | |||
1171 | /* Meta data layout | ||
1172 | We reserve a 128MB Block (4k aligned) | ||
1173 | * either at the end of the backing device | ||
1174 | * or on a seperate meta data device. */ | ||
1175 | |||
1176 | #define MD_RESERVED_SECT (128LU << 11) /* 128 MB, unit sectors */ | ||
1177 | /* The following numbers are sectors */ | ||
1178 | #define MD_AL_OFFSET 8 /* 8 Sectors after start of meta area */ | ||
1179 | #define MD_AL_MAX_SIZE 64 /* = 32 kb LOG ~ 3776 extents ~ 14 GB Storage */ | ||
1180 | /* Allows up to about 3.8TB */ | ||
1181 | #define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_MAX_SIZE) | ||
1182 | |||
1183 | /* Since the smalles IO unit is usually 512 byte */ | ||
1184 | #define MD_SECTOR_SHIFT 9 | ||
1185 | #define MD_SECTOR_SIZE (1<<MD_SECTOR_SHIFT) | ||
1186 | |||
1187 | /* activity log */ | ||
1188 | #define AL_EXTENTS_PT ((MD_SECTOR_SIZE-12)/8-1) /* 61 ; Extents per 512B sector */ | ||
1189 | #define AL_EXTENT_SHIFT 22 /* One extent represents 4M Storage */ | ||
1190 | #define AL_EXTENT_SIZE (1<<AL_EXTENT_SHIFT) | ||
1191 | |||
1192 | #if BITS_PER_LONG == 32 | ||
1193 | #define LN2_BPL 5 | ||
1194 | #define cpu_to_lel(A) cpu_to_le32(A) | ||
1195 | #define lel_to_cpu(A) le32_to_cpu(A) | ||
1196 | #elif BITS_PER_LONG == 64 | ||
1197 | #define LN2_BPL 6 | ||
1198 | #define cpu_to_lel(A) cpu_to_le64(A) | ||
1199 | #define lel_to_cpu(A) le64_to_cpu(A) | ||
1200 | #else | ||
1201 | #error "LN2 of BITS_PER_LONG unknown!" | ||
1202 | #endif | ||
1203 | |||
1204 | /* resync bitmap */ | ||
1205 | /* 16MB sized 'bitmap extent' to track syncer usage */ | ||
1206 | struct bm_extent { | ||
1207 | int rs_left; /* number of bits set (out of sync) in this extent. */ | ||
1208 | int rs_failed; /* number of failed resync requests in this extent. */ | ||
1209 | unsigned long flags; | ||
1210 | struct lc_element lce; | ||
1211 | }; | ||
1212 | |||
1213 | #define BME_NO_WRITES 0 /* bm_extent.flags: no more requests on this one! */ | ||
1214 | #define BME_LOCKED 1 /* bm_extent.flags: syncer active on this one. */ | ||
1215 | |||
1216 | /* drbd_bitmap.c */ | ||
1217 | /* | ||
1218 | * We need to store one bit for a block. | ||
1219 | * Example: 1GB disk @ 4096 byte blocks ==> we need 32 KB bitmap. | ||
1220 | * Bit 0 ==> local node thinks this block is binary identical on both nodes | ||
1221 | * Bit 1 ==> local node thinks this block needs to be synced. | ||
1222 | */ | ||
1223 | |||
1224 | #define BM_BLOCK_SHIFT 12 /* 4k per bit */ | ||
1225 | #define BM_BLOCK_SIZE (1<<BM_BLOCK_SHIFT) | ||
1226 | /* (9+3) : 512 bytes @ 8 bits; representing 16M storage | ||
1227 | * per sector of on disk bitmap */ | ||
1228 | #define BM_EXT_SHIFT (BM_BLOCK_SHIFT + MD_SECTOR_SHIFT + 3) /* = 24 */ | ||
1229 | #define BM_EXT_SIZE (1<<BM_EXT_SHIFT) | ||
1230 | |||
1231 | #if (BM_EXT_SHIFT != 24) || (BM_BLOCK_SHIFT != 12) | ||
1232 | #error "HAVE YOU FIXED drbdmeta AS WELL??" | ||
1233 | #endif | ||
1234 | |||
1235 | /* thus many _storage_ sectors are described by one bit */ | ||
1236 | #define BM_SECT_TO_BIT(x) ((x)>>(BM_BLOCK_SHIFT-9)) | ||
1237 | #define BM_BIT_TO_SECT(x) ((sector_t)(x)<<(BM_BLOCK_SHIFT-9)) | ||
1238 | #define BM_SECT_PER_BIT BM_BIT_TO_SECT(1) | ||
1239 | |||
1240 | /* bit to represented kilo byte conversion */ | ||
1241 | #define Bit2KB(bits) ((bits)<<(BM_BLOCK_SHIFT-10)) | ||
1242 | |||
1243 | /* in which _bitmap_ extent (resp. sector) the bit for a certain | ||
1244 | * _storage_ sector is located in */ | ||
1245 | #define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SHIFT-9)) | ||
1246 | |||
1247 | /* how much _storage_ sectors we have per bitmap sector */ | ||
1248 | #define BM_EXT_TO_SECT(x) ((sector_t)(x) << (BM_EXT_SHIFT-9)) | ||
1249 | #define BM_SECT_PER_EXT BM_EXT_TO_SECT(1) | ||
1250 | |||
1251 | /* in one sector of the bitmap, we have this many activity_log extents. */ | ||
1252 | #define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT)) | ||
1253 | #define BM_WORDS_PER_AL_EXT (1 << (AL_EXTENT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL)) | ||
1254 | |||
1255 | #define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SHIFT - BM_BLOCK_SHIFT) | ||
1256 | #define BM_BLOCKS_PER_BM_EXT_MASK ((1<<BM_BLOCKS_PER_BM_EXT_B) - 1) | ||
1257 | |||
1258 | /* the extent in "PER_EXTENT" below is an activity log extent | ||
1259 | * we need that many (long words/bytes) to store the bitmap | ||
1260 | * of one AL_EXTENT_SIZE chunk of storage. | ||
1261 | * we can store the bitmap for that many AL_EXTENTS within | ||
1262 | * one sector of the _on_disk_ bitmap: | ||
1263 | * bit 0 bit 37 bit 38 bit (512*8)-1 | ||
1264 | * ...|........|........|.. // ..|........| | ||
1265 | * sect. 0 `296 `304 ^(512*8*8)-1 | ||
1266 | * | ||
1267 | #define BM_WORDS_PER_EXT ( (AL_EXT_SIZE/BM_BLOCK_SIZE) / BITS_PER_LONG ) | ||
1268 | #define BM_BYTES_PER_EXT ( (AL_EXT_SIZE/BM_BLOCK_SIZE) / 8 ) // 128 | ||
1269 | #define BM_EXT_PER_SECT ( 512 / BM_BYTES_PER_EXTENT ) // 4 | ||
1270 | */ | ||
1271 | |||
1272 | #define DRBD_MAX_SECTORS_32 (0xffffffffLU) | ||
1273 | #define DRBD_MAX_SECTORS_BM \ | ||
1274 | ((MD_RESERVED_SECT - MD_BM_OFFSET) * (1LL<<(BM_EXT_SHIFT-9))) | ||
1275 | #if DRBD_MAX_SECTORS_BM < DRBD_MAX_SECTORS_32 | ||
1276 | #define DRBD_MAX_SECTORS DRBD_MAX_SECTORS_BM | ||
1277 | #define DRBD_MAX_SECTORS_FLEX DRBD_MAX_SECTORS_BM | ||
1278 | #elif !defined(CONFIG_LBD) && BITS_PER_LONG == 32 | ||
1279 | #define DRBD_MAX_SECTORS DRBD_MAX_SECTORS_32 | ||
1280 | #define DRBD_MAX_SECTORS_FLEX DRBD_MAX_SECTORS_32 | ||
1281 | #else | ||
1282 | #define DRBD_MAX_SECTORS DRBD_MAX_SECTORS_BM | ||
1283 | /* 16 TB in units of sectors */ | ||
1284 | #if BITS_PER_LONG == 32 | ||
1285 | /* adjust by one page worth of bitmap, | ||
1286 | * so we won't wrap around in drbd_bm_find_next_bit. | ||
1287 | * you should use 64bit OS for that much storage, anyways. */ | ||
1288 | #define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0xffff7fff) | ||
1289 | #else | ||
1290 | #define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0x1LU << 32) | ||
1291 | #endif | ||
1292 | #endif | ||
1293 | |||
1294 | /* Sector shift value for the "hash" functions of tl_hash and ee_hash tables. | ||
1295 | * With a value of 6 all IO in one 32K block make it to the same slot of the | ||
1296 | * hash table. */ | ||
1297 | #define HT_SHIFT 6 | ||
1298 | #define DRBD_MAX_SEGMENT_SIZE (1U<<(9+HT_SHIFT)) | ||
1299 | |||
1300 | /* Number of elements in the app_reads_hash */ | ||
1301 | #define APP_R_HSIZE 15 | ||
1302 | |||
1303 | extern int drbd_bm_init(struct drbd_conf *mdev); | ||
1304 | extern int drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors); | ||
1305 | extern void drbd_bm_cleanup(struct drbd_conf *mdev); | ||
1306 | extern void drbd_bm_set_all(struct drbd_conf *mdev); | ||
1307 | extern void drbd_bm_clear_all(struct drbd_conf *mdev); | ||
1308 | extern int drbd_bm_set_bits( | ||
1309 | struct drbd_conf *mdev, unsigned long s, unsigned long e); | ||
1310 | extern int drbd_bm_clear_bits( | ||
1311 | struct drbd_conf *mdev, unsigned long s, unsigned long e); | ||
1312 | /* bm_set_bits variant for use while holding drbd_bm_lock */ | ||
1313 | extern void _drbd_bm_set_bits(struct drbd_conf *mdev, | ||
1314 | const unsigned long s, const unsigned long e); | ||
1315 | extern int drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr); | ||
1316 | extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr); | ||
1317 | extern int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local); | ||
1318 | extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); | ||
1319 | extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); | ||
1320 | extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, | ||
1321 | unsigned long al_enr); | ||
1322 | extern size_t drbd_bm_words(struct drbd_conf *mdev); | ||
1323 | extern unsigned long drbd_bm_bits(struct drbd_conf *mdev); | ||
1324 | extern sector_t drbd_bm_capacity(struct drbd_conf *mdev); | ||
1325 | extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); | ||
1326 | /* bm_find_next variants for use while you hold drbd_bm_lock() */ | ||
1327 | extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); | ||
1328 | extern unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo); | ||
1329 | extern unsigned long drbd_bm_total_weight(struct drbd_conf *mdev); | ||
1330 | extern int drbd_bm_rs_done(struct drbd_conf *mdev); | ||
1331 | /* for receive_bitmap */ | ||
1332 | extern void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, | ||
1333 | size_t number, unsigned long *buffer); | ||
1334 | /* for _drbd_send_bitmap and drbd_bm_write_sect */ | ||
1335 | extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, | ||
1336 | size_t number, unsigned long *buffer); | ||
1337 | |||
1338 | extern void drbd_bm_lock(struct drbd_conf *mdev, char *why); | ||
1339 | extern void drbd_bm_unlock(struct drbd_conf *mdev); | ||
1340 | |||
1341 | extern int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e); | ||
1342 | /* drbd_main.c */ | ||
1343 | |||
1344 | extern struct kmem_cache *drbd_request_cache; | ||
1345 | extern struct kmem_cache *drbd_ee_cache; /* epoch entries */ | ||
1346 | extern struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */ | ||
1347 | extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ | ||
1348 | extern mempool_t *drbd_request_mempool; | ||
1349 | extern mempool_t *drbd_ee_mempool; | ||
1350 | |||
1351 | extern struct page *drbd_pp_pool; /* drbd's page pool */ | ||
1352 | extern spinlock_t drbd_pp_lock; | ||
1353 | extern int drbd_pp_vacant; | ||
1354 | extern wait_queue_head_t drbd_pp_wait; | ||
1355 | |||
1356 | extern rwlock_t global_state_lock; | ||
1357 | |||
1358 | extern struct drbd_conf *drbd_new_device(unsigned int minor); | ||
1359 | extern void drbd_free_mdev(struct drbd_conf *mdev); | ||
1360 | |||
1361 | extern int proc_details; | ||
1362 | |||
1363 | /* drbd_req */ | ||
1364 | extern int drbd_make_request_26(struct request_queue *q, struct bio *bio); | ||
1365 | extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req); | ||
1366 | extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec); | ||
1367 | extern int is_valid_ar_handle(struct drbd_request *, sector_t); | ||
1368 | |||
1369 | |||
1370 | /* drbd_nl.c */ | ||
1371 | extern void drbd_suspend_io(struct drbd_conf *mdev); | ||
1372 | extern void drbd_resume_io(struct drbd_conf *mdev); | ||
1373 | extern char *ppsize(char *buf, unsigned long long size); | ||
1374 | extern sector_t drbd_new_dev_size(struct drbd_conf *, | ||
1375 | struct drbd_backing_dev *); | ||
1376 | enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 }; | ||
1377 | extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *) __must_hold(local); | ||
1378 | extern void resync_after_online_grow(struct drbd_conf *); | ||
1379 | extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local); | ||
1380 | extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, | ||
1381 | int force); | ||
1382 | enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev); | ||
1383 | extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); | ||
1384 | |||
1385 | /* drbd_worker.c */ | ||
1386 | extern int drbd_worker(struct drbd_thread *thi); | ||
1387 | extern int drbd_alter_sa(struct drbd_conf *mdev, int na); | ||
1388 | extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side); | ||
1389 | extern void resume_next_sg(struct drbd_conf *mdev); | ||
1390 | extern void suspend_other_sg(struct drbd_conf *mdev); | ||
1391 | extern int drbd_resync_finished(struct drbd_conf *mdev); | ||
1392 | /* maybe rather drbd_main.c ? */ | ||
1393 | extern int drbd_md_sync_page_io(struct drbd_conf *mdev, | ||
1394 | struct drbd_backing_dev *bdev, sector_t sector, int rw); | ||
1395 | extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int); | ||
1396 | |||
1397 | static inline void ov_oos_print(struct drbd_conf *mdev) | ||
1398 | { | ||
1399 | if (mdev->ov_last_oos_size) { | ||
1400 | dev_err(DEV, "Out of sync: start=%llu, size=%lu (sectors)\n", | ||
1401 | (unsigned long long)mdev->ov_last_oos_start, | ||
1402 | (unsigned long)mdev->ov_last_oos_size); | ||
1403 | } | ||
1404 | mdev->ov_last_oos_size=0; | ||
1405 | } | ||
1406 | |||
1407 | |||
1408 | extern void drbd_csum(struct drbd_conf *, struct crypto_hash *, struct bio *, void *); | ||
1409 | /* worker callbacks */ | ||
1410 | extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int); | ||
1411 | extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int); | ||
1412 | extern int w_e_end_data_req(struct drbd_conf *, struct drbd_work *, int); | ||
1413 | extern int w_e_end_rsdata_req(struct drbd_conf *, struct drbd_work *, int); | ||
1414 | extern int w_e_end_csum_rs_req(struct drbd_conf *, struct drbd_work *, int); | ||
1415 | extern int w_e_end_ov_reply(struct drbd_conf *, struct drbd_work *, int); | ||
1416 | extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int); | ||
1417 | extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); | ||
1418 | extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int); | ||
1419 | extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); | ||
1420 | extern int w_io_error(struct drbd_conf *, struct drbd_work *, int); | ||
1421 | extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); | ||
1422 | extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int); | ||
1423 | extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); | ||
1424 | extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int); | ||
1425 | extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int); | ||
1426 | extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int); | ||
1427 | extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int); | ||
1428 | |||
1429 | extern void resync_timer_fn(unsigned long data); | ||
1430 | |||
1431 | /* drbd_receiver.c */ | ||
1432 | extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); | ||
1433 | extern struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, | ||
1434 | u64 id, | ||
1435 | sector_t sector, | ||
1436 | unsigned int data_size, | ||
1437 | gfp_t gfp_mask) __must_hold(local); | ||
1438 | extern void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e); | ||
1439 | extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev, | ||
1440 | struct list_head *head); | ||
1441 | extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, | ||
1442 | struct list_head *head); | ||
1443 | extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled); | ||
1444 | extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed); | ||
1445 | extern void drbd_flush_workqueue(struct drbd_conf *mdev); | ||
1446 | |||
1447 | /* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to | ||
1448 | * mess with get_fs/set_fs, we know we are KERNEL_DS always. */ | ||
1449 | static inline int drbd_setsockopt(struct socket *sock, int level, int optname, | ||
1450 | char __user *optval, int optlen) | ||
1451 | { | ||
1452 | int err; | ||
1453 | if (level == SOL_SOCKET) | ||
1454 | err = sock_setsockopt(sock, level, optname, optval, optlen); | ||
1455 | else | ||
1456 | err = sock->ops->setsockopt(sock, level, optname, optval, | ||
1457 | optlen); | ||
1458 | return err; | ||
1459 | } | ||
1460 | |||
1461 | static inline void drbd_tcp_cork(struct socket *sock) | ||
1462 | { | ||
1463 | int __user val = 1; | ||
1464 | (void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK, | ||
1465 | (char __user *)&val, sizeof(val)); | ||
1466 | } | ||
1467 | |||
1468 | static inline void drbd_tcp_uncork(struct socket *sock) | ||
1469 | { | ||
1470 | int __user val = 0; | ||
1471 | (void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK, | ||
1472 | (char __user *)&val, sizeof(val)); | ||
1473 | } | ||
1474 | |||
1475 | static inline void drbd_tcp_nodelay(struct socket *sock) | ||
1476 | { | ||
1477 | int __user val = 1; | ||
1478 | (void) drbd_setsockopt(sock, SOL_TCP, TCP_NODELAY, | ||
1479 | (char __user *)&val, sizeof(val)); | ||
1480 | } | ||
1481 | |||
1482 | static inline void drbd_tcp_quickack(struct socket *sock) | ||
1483 | { | ||
1484 | int __user val = 1; | ||
1485 | (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK, | ||
1486 | (char __user *)&val, sizeof(val)); | ||
1487 | } | ||
1488 | |||
1489 | void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo); | ||
1490 | |||
1491 | /* drbd_proc.c */ | ||
1492 | extern struct proc_dir_entry *drbd_proc; | ||
1493 | extern struct file_operations drbd_proc_fops; | ||
1494 | extern const char *drbd_conn_str(enum drbd_conns s); | ||
1495 | extern const char *drbd_role_str(enum drbd_role s); | ||
1496 | |||
1497 | /* drbd_actlog.c */ | ||
1498 | extern void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector); | ||
1499 | extern void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector); | ||
1500 | extern void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector); | ||
1501 | extern int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector); | ||
1502 | extern int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector); | ||
1503 | extern void drbd_rs_cancel_all(struct drbd_conf *mdev); | ||
1504 | extern int drbd_rs_del_all(struct drbd_conf *mdev); | ||
1505 | extern void drbd_rs_failed_io(struct drbd_conf *mdev, | ||
1506 | sector_t sector, int size); | ||
1507 | extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *); | ||
1508 | extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, | ||
1509 | int size, const char *file, const unsigned int line); | ||
1510 | #define drbd_set_in_sync(mdev, sector, size) \ | ||
1511 | __drbd_set_in_sync(mdev, sector, size, __FILE__, __LINE__) | ||
1512 | extern void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, | ||
1513 | int size, const char *file, const unsigned int line); | ||
1514 | #define drbd_set_out_of_sync(mdev, sector, size) \ | ||
1515 | __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__) | ||
1516 | extern void drbd_al_apply_to_bm(struct drbd_conf *mdev); | ||
1517 | extern void drbd_al_to_on_disk_bm(struct drbd_conf *mdev); | ||
1518 | extern void drbd_al_shrink(struct drbd_conf *mdev); | ||
1519 | |||
1520 | |||
1521 | /* drbd_nl.c */ | ||
1522 | |||
1523 | void drbd_nl_cleanup(void); | ||
1524 | int __init drbd_nl_init(void); | ||
1525 | void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state); | ||
1526 | void drbd_bcast_sync_progress(struct drbd_conf *mdev); | ||
1527 | void drbd_bcast_ee(struct drbd_conf *mdev, | ||
1528 | const char *reason, const int dgs, | ||
1529 | const char* seen_hash, const char* calc_hash, | ||
1530 | const struct drbd_epoch_entry* e); | ||
1531 | |||
1532 | |||
1533 | /** | ||
1534 | * DOC: DRBD State macros | ||
1535 | * | ||
1536 | * These macros are used to express state changes in easily readable form. | ||
1537 | * | ||
1538 | * The NS macros expand to a mask and a value, that can be bit ored onto the | ||
1539 | * current state as soon as the spinlock (req_lock) was taken. | ||
1540 | * | ||
1541 | * The _NS macros are used for state functions that get called with the | ||
1542 | * spinlock. These macros expand directly to the new state value. | ||
1543 | * | ||
1544 | * Besides the basic forms NS() and _NS() additional _?NS[23] are defined | ||
1545 | * to express state changes that affect more than one aspect of the state. | ||
1546 | * | ||
1547 | * E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY) | ||
1548 | * Means that the network connection was established and that the peer | ||
1549 | * is in secondary role. | ||
1550 | */ | ||
1551 | #define role_MASK R_MASK | ||
1552 | #define peer_MASK R_MASK | ||
1553 | #define disk_MASK D_MASK | ||
1554 | #define pdsk_MASK D_MASK | ||
1555 | #define conn_MASK C_MASK | ||
1556 | #define susp_MASK 1 | ||
1557 | #define user_isp_MASK 1 | ||
1558 | #define aftr_isp_MASK 1 | ||
1559 | |||
1560 | #define NS(T, S) \ | ||
1561 | ({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \ | ||
1562 | ({ union drbd_state val; val.i = 0; val.T = (S); val; }) | ||
1563 | #define NS2(T1, S1, T2, S2) \ | ||
1564 | ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \ | ||
1565 | mask.T2 = T2##_MASK; mask; }), \ | ||
1566 | ({ union drbd_state val; val.i = 0; val.T1 = (S1); \ | ||
1567 | val.T2 = (S2); val; }) | ||
1568 | #define NS3(T1, S1, T2, S2, T3, S3) \ | ||
1569 | ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \ | ||
1570 | mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \ | ||
1571 | ({ union drbd_state val; val.i = 0; val.T1 = (S1); \ | ||
1572 | val.T2 = (S2); val.T3 = (S3); val; }) | ||
1573 | |||
1574 | #define _NS(D, T, S) \ | ||
1575 | D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T = (S); __ns; }) | ||
1576 | #define _NS2(D, T1, S1, T2, S2) \ | ||
1577 | D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ | ||
1578 | __ns.T2 = (S2); __ns; }) | ||
1579 | #define _NS3(D, T1, S1, T2, S2, T3, S3) \ | ||
1580 | D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ | ||
1581 | __ns.T2 = (S2); __ns.T3 = (S3); __ns; }) | ||
1582 | |||
1583 | /* | ||
1584 | * inline helper functions | ||
1585 | *************************/ | ||
1586 | |||
1587 | static inline void drbd_state_lock(struct drbd_conf *mdev) | ||
1588 | { | ||
1589 | wait_event(mdev->misc_wait, | ||
1590 | !test_and_set_bit(CLUSTER_ST_CHANGE, &mdev->flags)); | ||
1591 | } | ||
1592 | |||
1593 | static inline void drbd_state_unlock(struct drbd_conf *mdev) | ||
1594 | { | ||
1595 | clear_bit(CLUSTER_ST_CHANGE, &mdev->flags); | ||
1596 | wake_up(&mdev->misc_wait); | ||
1597 | } | ||
1598 | |||
1599 | static inline int _drbd_set_state(struct drbd_conf *mdev, | ||
1600 | union drbd_state ns, enum chg_state_flags flags, | ||
1601 | struct completion *done) | ||
1602 | { | ||
1603 | int rv; | ||
1604 | |||
1605 | read_lock(&global_state_lock); | ||
1606 | rv = __drbd_set_state(mdev, ns, flags, done); | ||
1607 | read_unlock(&global_state_lock); | ||
1608 | |||
1609 | return rv; | ||
1610 | } | ||
1611 | |||
1612 | /** | ||
1613 | * drbd_request_state() - Reqest a state change | ||
1614 | * @mdev: DRBD device. | ||
1615 | * @mask: mask of state bits to change. | ||
1616 | * @val: value of new state bits. | ||
1617 | * | ||
1618 | * This is the most graceful way of requesting a state change. It is verbose | ||
1619 | * quite verbose in case the state change is not possible, and all those | ||
1620 | * state changes are globally serialized. | ||
1621 | */ | ||
1622 | static inline int drbd_request_state(struct drbd_conf *mdev, | ||
1623 | union drbd_state mask, | ||
1624 | union drbd_state val) | ||
1625 | { | ||
1626 | return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED); | ||
1627 | } | ||
1628 | |||
1629 | #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) | ||
1630 | static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where) | ||
1631 | { | ||
1632 | switch (mdev->ldev->dc.on_io_error) { | ||
1633 | case EP_PASS_ON: | ||
1634 | if (!forcedetach) { | ||
1635 | if (printk_ratelimit()) | ||
1636 | dev_err(DEV, "Local IO failed in %s." | ||
1637 | "Passing error on...\n", where); | ||
1638 | break; | ||
1639 | } | ||
1640 | /* NOTE fall through to detach case if forcedetach set */ | ||
1641 | case EP_DETACH: | ||
1642 | case EP_CALL_HELPER: | ||
1643 | if (mdev->state.disk > D_FAILED) { | ||
1644 | _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL); | ||
1645 | dev_err(DEV, "Local IO failed in %s." | ||
1646 | "Detaching...\n", where); | ||
1647 | } | ||
1648 | break; | ||
1649 | } | ||
1650 | } | ||
1651 | |||
1652 | /** | ||
1653 | * drbd_chk_io_error: Handle the on_io_error setting, should be called from all io completion handlers | ||
1654 | * @mdev: DRBD device. | ||
1655 | * @error: Error code passed to the IO completion callback | ||
1656 | * @forcedetach: Force detach. I.e. the error happened while accessing the meta data | ||
1657 | * | ||
1658 | * See also drbd_main.c:after_state_ch() if (os.disk > D_FAILED && ns.disk == D_FAILED) | ||
1659 | */ | ||
1660 | #define drbd_chk_io_error(m,e,f) drbd_chk_io_error_(m,e,f, __func__) | ||
1661 | static inline void drbd_chk_io_error_(struct drbd_conf *mdev, | ||
1662 | int error, int forcedetach, const char *where) | ||
1663 | { | ||
1664 | if (error) { | ||
1665 | unsigned long flags; | ||
1666 | spin_lock_irqsave(&mdev->req_lock, flags); | ||
1667 | __drbd_chk_io_error_(mdev, forcedetach, where); | ||
1668 | spin_unlock_irqrestore(&mdev->req_lock, flags); | ||
1669 | } | ||
1670 | } | ||
1671 | |||
1672 | |||
1673 | /** | ||
1674 | * drbd_md_first_sector() - Returns the first sector number of the meta data area | ||
1675 | * @bdev: Meta data block device. | ||
1676 | * | ||
1677 | * BTW, for internal meta data, this happens to be the maximum capacity | ||
1678 | * we could agree upon with our peer node. | ||
1679 | */ | ||
1680 | static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev) | ||
1681 | { | ||
1682 | switch (bdev->dc.meta_dev_idx) { | ||
1683 | case DRBD_MD_INDEX_INTERNAL: | ||
1684 | case DRBD_MD_INDEX_FLEX_INT: | ||
1685 | return bdev->md.md_offset + bdev->md.bm_offset; | ||
1686 | case DRBD_MD_INDEX_FLEX_EXT: | ||
1687 | default: | ||
1688 | return bdev->md.md_offset; | ||
1689 | } | ||
1690 | } | ||
1691 | |||
1692 | /** | ||
1693 | * drbd_md_last_sector() - Return the last sector number of the meta data area | ||
1694 | * @bdev: Meta data block device. | ||
1695 | */ | ||
1696 | static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev) | ||
1697 | { | ||
1698 | switch (bdev->dc.meta_dev_idx) { | ||
1699 | case DRBD_MD_INDEX_INTERNAL: | ||
1700 | case DRBD_MD_INDEX_FLEX_INT: | ||
1701 | return bdev->md.md_offset + MD_AL_OFFSET - 1; | ||
1702 | case DRBD_MD_INDEX_FLEX_EXT: | ||
1703 | default: | ||
1704 | return bdev->md.md_offset + bdev->md.md_size_sect; | ||
1705 | } | ||
1706 | } | ||
1707 | |||
1708 | /* Returns the number of 512 byte sectors of the device */ | ||
1709 | static inline sector_t drbd_get_capacity(struct block_device *bdev) | ||
1710 | { | ||
1711 | /* return bdev ? get_capacity(bdev->bd_disk) : 0; */ | ||
1712 | return bdev ? bdev->bd_inode->i_size >> 9 : 0; | ||
1713 | } | ||
1714 | |||
1715 | /** | ||
1716 | * drbd_get_max_capacity() - Returns the capacity we announce to out peer | ||
1717 | * @bdev: Meta data block device. | ||
1718 | * | ||
1719 | * returns the capacity we announce to out peer. we clip ourselves at the | ||
1720 | * various MAX_SECTORS, because if we don't, current implementation will | ||
1721 | * oops sooner or later | ||
1722 | */ | ||
1723 | static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev) | ||
1724 | { | ||
1725 | sector_t s; | ||
1726 | switch (bdev->dc.meta_dev_idx) { | ||
1727 | case DRBD_MD_INDEX_INTERNAL: | ||
1728 | case DRBD_MD_INDEX_FLEX_INT: | ||
1729 | s = drbd_get_capacity(bdev->backing_bdev) | ||
1730 | ? min_t(sector_t, DRBD_MAX_SECTORS_FLEX, | ||
1731 | drbd_md_first_sector(bdev)) | ||
1732 | : 0; | ||
1733 | break; | ||
1734 | case DRBD_MD_INDEX_FLEX_EXT: | ||
1735 | s = min_t(sector_t, DRBD_MAX_SECTORS_FLEX, | ||
1736 | drbd_get_capacity(bdev->backing_bdev)); | ||
1737 | /* clip at maximum size the meta device can support */ | ||
1738 | s = min_t(sector_t, s, | ||
1739 | BM_EXT_TO_SECT(bdev->md.md_size_sect | ||
1740 | - bdev->md.bm_offset)); | ||
1741 | break; | ||
1742 | default: | ||
1743 | s = min_t(sector_t, DRBD_MAX_SECTORS, | ||
1744 | drbd_get_capacity(bdev->backing_bdev)); | ||
1745 | } | ||
1746 | return s; | ||
1747 | } | ||
1748 | |||
1749 | /** | ||
1750 | * drbd_md_ss__() - Return the sector number of our meta data super block | ||
1751 | * @mdev: DRBD device. | ||
1752 | * @bdev: Meta data block device. | ||
1753 | */ | ||
1754 | static inline sector_t drbd_md_ss__(struct drbd_conf *mdev, | ||
1755 | struct drbd_backing_dev *bdev) | ||
1756 | { | ||
1757 | switch (bdev->dc.meta_dev_idx) { | ||
1758 | default: /* external, some index */ | ||
1759 | return MD_RESERVED_SECT * bdev->dc.meta_dev_idx; | ||
1760 | case DRBD_MD_INDEX_INTERNAL: | ||
1761 | /* with drbd08, internal meta data is always "flexible" */ | ||
1762 | case DRBD_MD_INDEX_FLEX_INT: | ||
1763 | /* sizeof(struct md_on_disk_07) == 4k | ||
1764 | * position: last 4k aligned block of 4k size */ | ||
1765 | if (!bdev->backing_bdev) { | ||
1766 | if (__ratelimit(&drbd_ratelimit_state)) { | ||
1767 | dev_err(DEV, "bdev->backing_bdev==NULL\n"); | ||
1768 | dump_stack(); | ||
1769 | } | ||
1770 | return 0; | ||
1771 | } | ||
1772 | return (drbd_get_capacity(bdev->backing_bdev) & ~7ULL) | ||
1773 | - MD_AL_OFFSET; | ||
1774 | case DRBD_MD_INDEX_FLEX_EXT: | ||
1775 | return 0; | ||
1776 | } | ||
1777 | } | ||
1778 | |||
1779 | static inline void | ||
1780 | _drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) | ||
1781 | { | ||
1782 | list_add_tail(&w->list, &q->q); | ||
1783 | up(&q->s); | ||
1784 | } | ||
1785 | |||
1786 | static inline void | ||
1787 | drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w) | ||
1788 | { | ||
1789 | unsigned long flags; | ||
1790 | spin_lock_irqsave(&q->q_lock, flags); | ||
1791 | list_add(&w->list, &q->q); | ||
1792 | up(&q->s); /* within the spinlock, | ||
1793 | see comment near end of drbd_worker() */ | ||
1794 | spin_unlock_irqrestore(&q->q_lock, flags); | ||
1795 | } | ||
1796 | |||
1797 | static inline void | ||
1798 | drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) | ||
1799 | { | ||
1800 | unsigned long flags; | ||
1801 | spin_lock_irqsave(&q->q_lock, flags); | ||
1802 | list_add_tail(&w->list, &q->q); | ||
1803 | up(&q->s); /* within the spinlock, | ||
1804 | see comment near end of drbd_worker() */ | ||
1805 | spin_unlock_irqrestore(&q->q_lock, flags); | ||
1806 | } | ||
1807 | |||
1808 | static inline void wake_asender(struct drbd_conf *mdev) | ||
1809 | { | ||
1810 | if (test_bit(SIGNAL_ASENDER, &mdev->flags)) | ||
1811 | force_sig(DRBD_SIG, mdev->asender.task); | ||
1812 | } | ||
1813 | |||
1814 | static inline void request_ping(struct drbd_conf *mdev) | ||
1815 | { | ||
1816 | set_bit(SEND_PING, &mdev->flags); | ||
1817 | wake_asender(mdev); | ||
1818 | } | ||
1819 | |||
1820 | static inline int drbd_send_short_cmd(struct drbd_conf *mdev, | ||
1821 | enum drbd_packets cmd) | ||
1822 | { | ||
1823 | struct p_header h; | ||
1824 | return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h)); | ||
1825 | } | ||
1826 | |||
1827 | static inline int drbd_send_ping(struct drbd_conf *mdev) | ||
1828 | { | ||
1829 | struct p_header h; | ||
1830 | return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h)); | ||
1831 | } | ||
1832 | |||
1833 | static inline int drbd_send_ping_ack(struct drbd_conf *mdev) | ||
1834 | { | ||
1835 | struct p_header h; | ||
1836 | return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h)); | ||
1837 | } | ||
1838 | |||
1839 | static inline void drbd_thread_stop(struct drbd_thread *thi) | ||
1840 | { | ||
1841 | _drbd_thread_stop(thi, FALSE, TRUE); | ||
1842 | } | ||
1843 | |||
1844 | static inline void drbd_thread_stop_nowait(struct drbd_thread *thi) | ||
1845 | { | ||
1846 | _drbd_thread_stop(thi, FALSE, FALSE); | ||
1847 | } | ||
1848 | |||
1849 | static inline void drbd_thread_restart_nowait(struct drbd_thread *thi) | ||
1850 | { | ||
1851 | _drbd_thread_stop(thi, TRUE, FALSE); | ||
1852 | } | ||
1853 | |||
1854 | /* counts how many answer packets packets we expect from our peer, | ||
1855 | * for either explicit application requests, | ||
1856 | * or implicit barrier packets as necessary. | ||
1857 | * increased: | ||
1858 | * w_send_barrier | ||
1859 | * _req_mod(req, queue_for_net_write or queue_for_net_read); | ||
1860 | * it is much easier and equally valid to count what we queue for the | ||
1861 | * worker, even before it actually was queued or send. | ||
1862 | * (drbd_make_request_common; recovery path on read io-error) | ||
1863 | * decreased: | ||
1864 | * got_BarrierAck (respective tl_clear, tl_clear_barrier) | ||
1865 | * _req_mod(req, data_received) | ||
1866 | * [from receive_DataReply] | ||
1867 | * _req_mod(req, write_acked_by_peer or recv_acked_by_peer or neg_acked) | ||
1868 | * [from got_BlockAck (P_WRITE_ACK, P_RECV_ACK)] | ||
1869 | * for some reason it is NOT decreased in got_NegAck, | ||
1870 | * but in the resulting cleanup code from report_params. | ||
1871 | * we should try to remember the reason for that... | ||
1872 | * _req_mod(req, send_failed or send_canceled) | ||
1873 | * _req_mod(req, connection_lost_while_pending) | ||
1874 | * [from tl_clear_barrier] | ||
1875 | */ | ||
1876 | static inline void inc_ap_pending(struct drbd_conf *mdev) | ||
1877 | { | ||
1878 | atomic_inc(&mdev->ap_pending_cnt); | ||
1879 | } | ||
1880 | |||
1881 | #define ERR_IF_CNT_IS_NEGATIVE(which) \ | ||
1882 | if (atomic_read(&mdev->which) < 0) \ | ||
1883 | dev_err(DEV, "in %s:%d: " #which " = %d < 0 !\n", \ | ||
1884 | __func__ , __LINE__ , \ | ||
1885 | atomic_read(&mdev->which)) | ||
1886 | |||
1887 | #define dec_ap_pending(mdev) do { \ | ||
1888 | typecheck(struct drbd_conf *, mdev); \ | ||
1889 | if (atomic_dec_and_test(&mdev->ap_pending_cnt)) \ | ||
1890 | wake_up(&mdev->misc_wait); \ | ||
1891 | ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt); } while (0) | ||
1892 | |||
1893 | /* counts how many resync-related answers we still expect from the peer | ||
1894 | * increase decrease | ||
1895 | * C_SYNC_TARGET sends P_RS_DATA_REQUEST (and expects P_RS_DATA_REPLY) | ||
1896 | * C_SYNC_SOURCE sends P_RS_DATA_REPLY (and expects P_WRITE_ACK whith ID_SYNCER) | ||
1897 | * (or P_NEG_ACK with ID_SYNCER) | ||
1898 | */ | ||
1899 | static inline void inc_rs_pending(struct drbd_conf *mdev) | ||
1900 | { | ||
1901 | atomic_inc(&mdev->rs_pending_cnt); | ||
1902 | } | ||
1903 | |||
1904 | #define dec_rs_pending(mdev) do { \ | ||
1905 | typecheck(struct drbd_conf *, mdev); \ | ||
1906 | atomic_dec(&mdev->rs_pending_cnt); \ | ||
1907 | ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt); } while (0) | ||
1908 | |||
1909 | /* counts how many answers we still need to send to the peer. | ||
1910 | * increased on | ||
1911 | * receive_Data unless protocol A; | ||
1912 | * we need to send a P_RECV_ACK (proto B) | ||
1913 | * or P_WRITE_ACK (proto C) | ||
1914 | * receive_RSDataReply (recv_resync_read) we need to send a P_WRITE_ACK | ||
1915 | * receive_DataRequest (receive_RSDataRequest) we need to send back P_DATA | ||
1916 | * receive_Barrier_* we need to send a P_BARRIER_ACK | ||
1917 | */ | ||
1918 | static inline void inc_unacked(struct drbd_conf *mdev) | ||
1919 | { | ||
1920 | atomic_inc(&mdev->unacked_cnt); | ||
1921 | } | ||
1922 | |||
1923 | #define dec_unacked(mdev) do { \ | ||
1924 | typecheck(struct drbd_conf *, mdev); \ | ||
1925 | atomic_dec(&mdev->unacked_cnt); \ | ||
1926 | ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0) | ||
1927 | |||
1928 | #define sub_unacked(mdev, n) do { \ | ||
1929 | typecheck(struct drbd_conf *, mdev); \ | ||
1930 | atomic_sub(n, &mdev->unacked_cnt); \ | ||
1931 | ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0) | ||
1932 | |||
1933 | |||
1934 | static inline void put_net_conf(struct drbd_conf *mdev) | ||
1935 | { | ||
1936 | if (atomic_dec_and_test(&mdev->net_cnt)) | ||
1937 | wake_up(&mdev->misc_wait); | ||
1938 | } | ||
1939 | |||
1940 | /** | ||
1941 | * get_net_conf() - Increase ref count on mdev->net_conf; Returns 0 if nothing there | ||
1942 | * @mdev: DRBD device. | ||
1943 | * | ||
1944 | * You have to call put_net_conf() when finished working with mdev->net_conf. | ||
1945 | */ | ||
1946 | static inline int get_net_conf(struct drbd_conf *mdev) | ||
1947 | { | ||
1948 | int have_net_conf; | ||
1949 | |||
1950 | atomic_inc(&mdev->net_cnt); | ||
1951 | have_net_conf = mdev->state.conn >= C_UNCONNECTED; | ||
1952 | if (!have_net_conf) | ||
1953 | put_net_conf(mdev); | ||
1954 | return have_net_conf; | ||
1955 | } | ||
1956 | |||
1957 | /** | ||
1958 | * get_ldev() - Increase the ref count on mdev->ldev. Returns 0 if there is no ldev | ||
1959 | * @M: DRBD device. | ||
1960 | * | ||
1961 | * You have to call put_ldev() when finished working with mdev->ldev. | ||
1962 | */ | ||
1963 | #define get_ldev(M) __cond_lock(local, _get_ldev_if_state(M,D_INCONSISTENT)) | ||
1964 | #define get_ldev_if_state(M,MINS) __cond_lock(local, _get_ldev_if_state(M,MINS)) | ||
1965 | |||
1966 | static inline void put_ldev(struct drbd_conf *mdev) | ||
1967 | { | ||
1968 | __release(local); | ||
1969 | if (atomic_dec_and_test(&mdev->local_cnt)) | ||
1970 | wake_up(&mdev->misc_wait); | ||
1971 | D_ASSERT(atomic_read(&mdev->local_cnt) >= 0); | ||
1972 | } | ||
1973 | |||
1974 | #ifndef __CHECKER__ | ||
1975 | static inline int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) | ||
1976 | { | ||
1977 | int io_allowed; | ||
1978 | |||
1979 | atomic_inc(&mdev->local_cnt); | ||
1980 | io_allowed = (mdev->state.disk >= mins); | ||
1981 | if (!io_allowed) | ||
1982 | put_ldev(mdev); | ||
1983 | return io_allowed; | ||
1984 | } | ||
1985 | #else | ||
1986 | extern int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins); | ||
1987 | #endif | ||
1988 | |||
1989 | /* you must have an "get_ldev" reference */ | ||
1990 | static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, | ||
1991 | unsigned long *bits_left, unsigned int *per_mil_done) | ||
1992 | { | ||
1993 | /* | ||
1994 | * this is to break it at compile time when we change that | ||
1995 | * (we may feel 4TB maximum storage per drbd is not enough) | ||
1996 | */ | ||
1997 | typecheck(unsigned long, mdev->rs_total); | ||
1998 | |||
1999 | /* note: both rs_total and rs_left are in bits, i.e. in | ||
2000 | * units of BM_BLOCK_SIZE. | ||
2001 | * for the percentage, we don't care. */ | ||
2002 | |||
2003 | *bits_left = drbd_bm_total_weight(mdev) - mdev->rs_failed; | ||
2004 | /* >> 10 to prevent overflow, | ||
2005 | * +1 to prevent division by zero */ | ||
2006 | if (*bits_left > mdev->rs_total) { | ||
2007 | /* doh. maybe a logic bug somewhere. | ||
2008 | * may also be just a race condition | ||
2009 | * between this and a disconnect during sync. | ||
2010 | * for now, just prevent in-kernel buffer overflow. | ||
2011 | */ | ||
2012 | smp_rmb(); | ||
2013 | dev_warn(DEV, "cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu)\n", | ||
2014 | drbd_conn_str(mdev->state.conn), | ||
2015 | *bits_left, mdev->rs_total, mdev->rs_failed); | ||
2016 | *per_mil_done = 0; | ||
2017 | } else { | ||
2018 | /* make sure the calculation happens in long context */ | ||
2019 | unsigned long tmp = 1000UL - | ||
2020 | (*bits_left >> 10)*1000UL | ||
2021 | / ((mdev->rs_total >> 10) + 1UL); | ||
2022 | *per_mil_done = tmp; | ||
2023 | } | ||
2024 | } | ||
2025 | |||
2026 | |||
2027 | /* this throttles on-the-fly application requests | ||
2028 | * according to max_buffers settings; | ||
2029 | * maybe re-implement using semaphores? */ | ||
2030 | static inline int drbd_get_max_buffers(struct drbd_conf *mdev) | ||
2031 | { | ||
2032 | int mxb = 1000000; /* arbitrary limit on open requests */ | ||
2033 | if (get_net_conf(mdev)) { | ||
2034 | mxb = mdev->net_conf->max_buffers; | ||
2035 | put_net_conf(mdev); | ||
2036 | } | ||
2037 | return mxb; | ||
2038 | } | ||
2039 | |||
2040 | static inline int drbd_state_is_stable(union drbd_state s) | ||
2041 | { | ||
2042 | |||
2043 | /* DO NOT add a default clause, we want the compiler to warn us | ||
2044 | * for any newly introduced state we may have forgotten to add here */ | ||
2045 | |||
2046 | switch ((enum drbd_conns)s.conn) { | ||
2047 | /* new io only accepted when there is no connection, ... */ | ||
2048 | case C_STANDALONE: | ||
2049 | case C_WF_CONNECTION: | ||
2050 | /* ... or there is a well established connection. */ | ||
2051 | case C_CONNECTED: | ||
2052 | case C_SYNC_SOURCE: | ||
2053 | case C_SYNC_TARGET: | ||
2054 | case C_VERIFY_S: | ||
2055 | case C_VERIFY_T: | ||
2056 | case C_PAUSED_SYNC_S: | ||
2057 | case C_PAUSED_SYNC_T: | ||
2058 | /* maybe stable, look at the disk state */ | ||
2059 | break; | ||
2060 | |||
2061 | /* no new io accepted during tansitional states | ||
2062 | * like handshake or teardown */ | ||
2063 | case C_DISCONNECTING: | ||
2064 | case C_UNCONNECTED: | ||
2065 | case C_TIMEOUT: | ||
2066 | case C_BROKEN_PIPE: | ||
2067 | case C_NETWORK_FAILURE: | ||
2068 | case C_PROTOCOL_ERROR: | ||
2069 | case C_TEAR_DOWN: | ||
2070 | case C_WF_REPORT_PARAMS: | ||
2071 | case C_STARTING_SYNC_S: | ||
2072 | case C_STARTING_SYNC_T: | ||
2073 | case C_WF_BITMAP_S: | ||
2074 | case C_WF_BITMAP_T: | ||
2075 | case C_WF_SYNC_UUID: | ||
2076 | case C_MASK: | ||
2077 | /* not "stable" */ | ||
2078 | return 0; | ||
2079 | } | ||
2080 | |||
2081 | switch ((enum drbd_disk_state)s.disk) { | ||
2082 | case D_DISKLESS: | ||
2083 | case D_INCONSISTENT: | ||
2084 | case D_OUTDATED: | ||
2085 | case D_CONSISTENT: | ||
2086 | case D_UP_TO_DATE: | ||
2087 | /* disk state is stable as well. */ | ||
2088 | break; | ||
2089 | |||
2090 | /* no new io accepted during tansitional states */ | ||
2091 | case D_ATTACHING: | ||
2092 | case D_FAILED: | ||
2093 | case D_NEGOTIATING: | ||
2094 | case D_UNKNOWN: | ||
2095 | case D_MASK: | ||
2096 | /* not "stable" */ | ||
2097 | return 0; | ||
2098 | } | ||
2099 | |||
2100 | return 1; | ||
2101 | } | ||
2102 | |||
2103 | static inline int __inc_ap_bio_cond(struct drbd_conf *mdev) | ||
2104 | { | ||
2105 | int mxb = drbd_get_max_buffers(mdev); | ||
2106 | |||
2107 | if (mdev->state.susp) | ||
2108 | return 0; | ||
2109 | if (test_bit(SUSPEND_IO, &mdev->flags)) | ||
2110 | return 0; | ||
2111 | |||
2112 | /* to avoid potential deadlock or bitmap corruption, | ||
2113 | * in various places, we only allow new application io | ||
2114 | * to start during "stable" states. */ | ||
2115 | |||
2116 | /* no new io accepted when attaching or detaching the disk */ | ||
2117 | if (!drbd_state_is_stable(mdev->state)) | ||
2118 | return 0; | ||
2119 | |||
2120 | /* since some older kernels don't have atomic_add_unless, | ||
2121 | * and we are within the spinlock anyways, we have this workaround. */ | ||
2122 | if (atomic_read(&mdev->ap_bio_cnt) > mxb) | ||
2123 | return 0; | ||
2124 | if (test_bit(BITMAP_IO, &mdev->flags)) | ||
2125 | return 0; | ||
2126 | return 1; | ||
2127 | } | ||
2128 | |||
2129 | /* I'd like to use wait_event_lock_irq, | ||
2130 | * but I'm not sure when it got introduced, | ||
2131 | * and not sure when it has 3 or 4 arguments */ | ||
2132 | static inline void inc_ap_bio(struct drbd_conf *mdev, int one_or_two) | ||
2133 | { | ||
2134 | /* compare with after_state_ch, | ||
2135 | * os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S */ | ||
2136 | DEFINE_WAIT(wait); | ||
2137 | |||
2138 | /* we wait here | ||
2139 | * as long as the device is suspended | ||
2140 | * until the bitmap is no longer on the fly during connection | ||
2141 | * handshake as long as we would exeed the max_buffer limit. | ||
2142 | * | ||
2143 | * to avoid races with the reconnect code, | ||
2144 | * we need to atomic_inc within the spinlock. */ | ||
2145 | |||
2146 | spin_lock_irq(&mdev->req_lock); | ||
2147 | while (!__inc_ap_bio_cond(mdev)) { | ||
2148 | prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
2149 | spin_unlock_irq(&mdev->req_lock); | ||
2150 | schedule(); | ||
2151 | finish_wait(&mdev->misc_wait, &wait); | ||
2152 | spin_lock_irq(&mdev->req_lock); | ||
2153 | } | ||
2154 | atomic_add(one_or_two, &mdev->ap_bio_cnt); | ||
2155 | spin_unlock_irq(&mdev->req_lock); | ||
2156 | } | ||
2157 | |||
2158 | static inline void dec_ap_bio(struct drbd_conf *mdev) | ||
2159 | { | ||
2160 | int mxb = drbd_get_max_buffers(mdev); | ||
2161 | int ap_bio = atomic_dec_return(&mdev->ap_bio_cnt); | ||
2162 | |||
2163 | D_ASSERT(ap_bio >= 0); | ||
2164 | /* this currently does wake_up for every dec_ap_bio! | ||
2165 | * maybe rather introduce some type of hysteresis? | ||
2166 | * e.g. (ap_bio == mxb/2 || ap_bio == 0) ? */ | ||
2167 | if (ap_bio < mxb) | ||
2168 | wake_up(&mdev->misc_wait); | ||
2169 | if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) { | ||
2170 | if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) | ||
2171 | drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w); | ||
2172 | } | ||
2173 | } | ||
2174 | |||
2175 | static inline void drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) | ||
2176 | { | ||
2177 | mdev->ed_uuid = val; | ||
2178 | } | ||
2179 | |||
2180 | static inline int seq_cmp(u32 a, u32 b) | ||
2181 | { | ||
2182 | /* we assume wrap around at 32bit. | ||
2183 | * for wrap around at 24bit (old atomic_t), | ||
2184 | * we'd have to | ||
2185 | * a <<= 8; b <<= 8; | ||
2186 | */ | ||
2187 | return (s32)(a) - (s32)(b); | ||
2188 | } | ||
2189 | #define seq_lt(a, b) (seq_cmp((a), (b)) < 0) | ||
2190 | #define seq_gt(a, b) (seq_cmp((a), (b)) > 0) | ||
2191 | #define seq_ge(a, b) (seq_cmp((a), (b)) >= 0) | ||
2192 | #define seq_le(a, b) (seq_cmp((a), (b)) <= 0) | ||
2193 | /* CAUTION: please no side effects in arguments! */ | ||
2194 | #define seq_max(a, b) ((u32)(seq_gt((a), (b)) ? (a) : (b))) | ||
2195 | |||
2196 | static inline void update_peer_seq(struct drbd_conf *mdev, unsigned int new_seq) | ||
2197 | { | ||
2198 | unsigned int m; | ||
2199 | spin_lock(&mdev->peer_seq_lock); | ||
2200 | m = seq_max(mdev->peer_seq, new_seq); | ||
2201 | mdev->peer_seq = m; | ||
2202 | spin_unlock(&mdev->peer_seq_lock); | ||
2203 | if (m == new_seq) | ||
2204 | wake_up(&mdev->seq_wait); | ||
2205 | } | ||
2206 | |||
2207 | static inline void drbd_update_congested(struct drbd_conf *mdev) | ||
2208 | { | ||
2209 | struct sock *sk = mdev->data.socket->sk; | ||
2210 | if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5) | ||
2211 | set_bit(NET_CONGESTED, &mdev->flags); | ||
2212 | } | ||
2213 | |||
2214 | static inline int drbd_queue_order_type(struct drbd_conf *mdev) | ||
2215 | { | ||
2216 | /* sorry, we currently have no working implementation | ||
2217 | * of distributed TCQ stuff */ | ||
2218 | #ifndef QUEUE_ORDERED_NONE | ||
2219 | #define QUEUE_ORDERED_NONE 0 | ||
2220 | #endif | ||
2221 | return QUEUE_ORDERED_NONE; | ||
2222 | } | ||
2223 | |||
2224 | static inline void drbd_blk_run_queue(struct request_queue *q) | ||
2225 | { | ||
2226 | if (q && q->unplug_fn) | ||
2227 | q->unplug_fn(q); | ||
2228 | } | ||
2229 | |||
2230 | static inline void drbd_kick_lo(struct drbd_conf *mdev) | ||
2231 | { | ||
2232 | if (get_ldev(mdev)) { | ||
2233 | drbd_blk_run_queue(bdev_get_queue(mdev->ldev->backing_bdev)); | ||
2234 | put_ldev(mdev); | ||
2235 | } | ||
2236 | } | ||
2237 | |||
2238 | static inline void drbd_md_flush(struct drbd_conf *mdev) | ||
2239 | { | ||
2240 | int r; | ||
2241 | |||
2242 | if (test_bit(MD_NO_BARRIER, &mdev->flags)) | ||
2243 | return; | ||
2244 | |||
2245 | r = blkdev_issue_flush(mdev->ldev->md_bdev, NULL); | ||
2246 | if (r) { | ||
2247 | set_bit(MD_NO_BARRIER, &mdev->flags); | ||
2248 | dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r); | ||
2249 | } | ||
2250 | } | ||
2251 | |||
2252 | #endif | ||