aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd/drbd_int.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/drbd/drbd_int.h')
-rw-r--r--drivers/block/drbd/drbd_int.h1398
1 files changed, 601 insertions, 797 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 277c69c9465b..ef72a72814c7 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -39,9 +39,13 @@
39#include <linux/major.h> 39#include <linux/major.h>
40#include <linux/blkdev.h> 40#include <linux/blkdev.h>
41#include <linux/genhd.h> 41#include <linux/genhd.h>
42#include <linux/idr.h>
42#include <net/tcp.h> 43#include <net/tcp.h>
43#include <linux/lru_cache.h> 44#include <linux/lru_cache.h>
44#include <linux/prefetch.h> 45#include <linux/prefetch.h>
46#include <linux/drbd_genl_api.h>
47#include <linux/drbd.h>
48#include "drbd_state.h"
45 49
46#ifdef __CHECKER__ 50#ifdef __CHECKER__
47# define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr"))) 51# define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr")))
@@ -61,7 +65,6 @@
61extern unsigned int minor_count; 65extern unsigned int minor_count;
62extern bool disable_sendpage; 66extern bool disable_sendpage;
63extern bool allow_oos; 67extern bool allow_oos;
64extern unsigned int cn_idx;
65 68
66#ifdef CONFIG_DRBD_FAULT_INJECTION 69#ifdef CONFIG_DRBD_FAULT_INJECTION
67extern int enable_faults; 70extern int enable_faults;
@@ -86,34 +89,44 @@ extern char usermode_helper[];
86 */ 89 */
87#define DRBD_SIGKILL SIGHUP 90#define DRBD_SIGKILL SIGHUP
88 91
89/* All EEs on the free list should have ID_VACANT (== 0)
90 * freshly allocated EEs get !ID_VACANT (== 1)
91 * so if it says "cannot dereference null pointer at address 0x00000001",
92 * it is most likely one of these :( */
93
94#define ID_IN_SYNC (4711ULL) 92#define ID_IN_SYNC (4711ULL)
95#define ID_OUT_OF_SYNC (4712ULL) 93#define ID_OUT_OF_SYNC (4712ULL)
96
97#define ID_SYNCER (-1ULL) 94#define ID_SYNCER (-1ULL)
98#define ID_VACANT 0 95
99#define is_syncer_block_id(id) ((id) == ID_SYNCER)
100#define UUID_NEW_BM_OFFSET ((u64)0x0001000000000000ULL) 96#define UUID_NEW_BM_OFFSET ((u64)0x0001000000000000ULL)
101 97
102struct drbd_conf; 98struct drbd_conf;
99struct drbd_tconn;
103 100
104 101
105/* to shorten dev_warn(DEV, "msg"); and relatives statements */ 102/* to shorten dev_warn(DEV, "msg"); and relatives statements */
106#define DEV (disk_to_dev(mdev->vdisk)) 103#define DEV (disk_to_dev(mdev->vdisk))
107 104
105#define conn_printk(LEVEL, TCONN, FMT, ARGS...) \
106 printk(LEVEL "d-con %s: " FMT, TCONN->name , ## ARGS)
107#define conn_alert(TCONN, FMT, ARGS...) conn_printk(KERN_ALERT, TCONN, FMT, ## ARGS)
108#define conn_crit(TCONN, FMT, ARGS...) conn_printk(KERN_CRIT, TCONN, FMT, ## ARGS)
109#define conn_err(TCONN, FMT, ARGS...) conn_printk(KERN_ERR, TCONN, FMT, ## ARGS)
110#define conn_warn(TCONN, FMT, ARGS...) conn_printk(KERN_WARNING, TCONN, FMT, ## ARGS)
111#define conn_notice(TCONN, FMT, ARGS...) conn_printk(KERN_NOTICE, TCONN, FMT, ## ARGS)
112#define conn_info(TCONN, FMT, ARGS...) conn_printk(KERN_INFO, TCONN, FMT, ## ARGS)
113#define conn_dbg(TCONN, FMT, ARGS...) conn_printk(KERN_DEBUG, TCONN, FMT, ## ARGS)
114
108#define D_ASSERT(exp) if (!(exp)) \ 115#define D_ASSERT(exp) if (!(exp)) \
109 dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__) 116 dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__)
110 117
111#define ERR_IF(exp) if (({ \ 118/**
112 int _b = (exp) != 0; \ 119 * expect - Make an assertion
113 if (_b) dev_err(DEV, "ASSERT FAILED: %s: (%s) in %s:%d\n", \ 120 *
114 __func__, #exp, __FILE__, __LINE__); \ 121 * Unlike the assert macro, this macro returns a boolean result.
115 _b; \ 122 */
116 })) 123#define expect(exp) ({ \
124 bool _bool = (exp); \
125 if (!_bool) \
126 dev_err(DEV, "ASSERTION %s FAILED in %s\n", \
127 #exp, __func__); \
128 _bool; \
129 })
117 130
118/* Defines to control fault insertion */ 131/* Defines to control fault insertion */
119enum { 132enum {
@@ -150,15 +163,12 @@ drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) {
150/* usual integer division */ 163/* usual integer division */
151#define div_floor(A, B) ((A)/(B)) 164#define div_floor(A, B) ((A)/(B))
152 165
153/* drbd_meta-data.c (still in drbd_main.c) */
154/* 4th incarnation of the disk layout. */
155#define DRBD_MD_MAGIC (DRBD_MAGIC+4)
156
157extern struct drbd_conf **minor_table;
158extern struct ratelimit_state drbd_ratelimit_state; 166extern struct ratelimit_state drbd_ratelimit_state;
167extern struct idr minors; /* RCU, updates: genl_lock() */
168extern struct list_head drbd_tconns; /* RCU, updates: genl_lock() */
159 169
160/* on the wire */ 170/* on the wire */
161enum drbd_packets { 171enum drbd_packet {
162 /* receiver (data socket) */ 172 /* receiver (data socket) */
163 P_DATA = 0x00, 173 P_DATA = 0x00,
164 P_DATA_REPLY = 0x01, /* Response to P_DATA_REQUEST */ 174 P_DATA_REPLY = 0x01, /* Response to P_DATA_REQUEST */
@@ -186,7 +196,7 @@ enum drbd_packets {
186 P_RECV_ACK = 0x15, /* Used in protocol B */ 196 P_RECV_ACK = 0x15, /* Used in protocol B */
187 P_WRITE_ACK = 0x16, /* Used in protocol C */ 197 P_WRITE_ACK = 0x16, /* Used in protocol C */
188 P_RS_WRITE_ACK = 0x17, /* Is a P_WRITE_ACK, additionally call set_in_sync(). */ 198 P_RS_WRITE_ACK = 0x17, /* Is a P_WRITE_ACK, additionally call set_in_sync(). */
189 P_DISCARD_ACK = 0x18, /* Used in proto C, two-primaries conflict detection */ 199 P_SUPERSEDED = 0x18, /* Used in proto C, two-primaries conflict detection */
190 P_NEG_ACK = 0x19, /* Sent if local disk is unusable */ 200 P_NEG_ACK = 0x19, /* Sent if local disk is unusable */
191 P_NEG_DREPLY = 0x1a, /* Local disk is broken... */ 201 P_NEG_DREPLY = 0x1a, /* Local disk is broken... */
192 P_NEG_RS_DREPLY = 0x1b, /* Local disk is broken... */ 202 P_NEG_RS_DREPLY = 0x1b, /* Local disk is broken... */
@@ -207,77 +217,23 @@ enum drbd_packets {
207 P_DELAY_PROBE = 0x27, /* is used on BOTH sockets */ 217 P_DELAY_PROBE = 0x27, /* is used on BOTH sockets */
208 P_OUT_OF_SYNC = 0x28, /* Mark as out of sync (Outrunning), data socket */ 218 P_OUT_OF_SYNC = 0x28, /* Mark as out of sync (Outrunning), data socket */
209 P_RS_CANCEL = 0x29, /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */ 219 P_RS_CANCEL = 0x29, /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */
220 P_CONN_ST_CHG_REQ = 0x2a, /* data sock: Connection wide state request */
221 P_CONN_ST_CHG_REPLY = 0x2b, /* meta sock: Connection side state req reply */
222 P_RETRY_WRITE = 0x2c, /* Protocol C: retry conflicting write request */
223 P_PROTOCOL_UPDATE = 0x2d, /* data sock: is used in established connections */
210 224
211 P_MAX_CMD = 0x2A,
212 P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ 225 P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
213 P_MAX_OPT_CMD = 0x101, 226 P_MAX_OPT_CMD = 0x101,
214 227
215 /* special command ids for handshake */ 228 /* special command ids for handshake */
216 229
217 P_HAND_SHAKE_M = 0xfff1, /* First Packet on the MetaSock */ 230 P_INITIAL_META = 0xfff1, /* First Packet on the MetaSock */
218 P_HAND_SHAKE_S = 0xfff2, /* First Packet on the Socket */ 231 P_INITIAL_DATA = 0xfff2, /* First Packet on the Socket */
219 232
220 P_HAND_SHAKE = 0xfffe /* FIXED for the next century! */ 233 P_CONNECTION_FEATURES = 0xfffe /* FIXED for the next century! */
221}; 234};
222 235
223static inline const char *cmdname(enum drbd_packets cmd) 236extern const char *cmdname(enum drbd_packet cmd);
224{
225 /* THINK may need to become several global tables
226 * when we want to support more than
227 * one PRO_VERSION */
228 static const char *cmdnames[] = {
229 [P_DATA] = "Data",
230 [P_DATA_REPLY] = "DataReply",
231 [P_RS_DATA_REPLY] = "RSDataReply",
232 [P_BARRIER] = "Barrier",
233 [P_BITMAP] = "ReportBitMap",
234 [P_BECOME_SYNC_TARGET] = "BecomeSyncTarget",
235 [P_BECOME_SYNC_SOURCE] = "BecomeSyncSource",
236 [P_UNPLUG_REMOTE] = "UnplugRemote",
237 [P_DATA_REQUEST] = "DataRequest",
238 [P_RS_DATA_REQUEST] = "RSDataRequest",
239 [P_SYNC_PARAM] = "SyncParam",
240 [P_SYNC_PARAM89] = "SyncParam89",
241 [P_PROTOCOL] = "ReportProtocol",
242 [P_UUIDS] = "ReportUUIDs",
243 [P_SIZES] = "ReportSizes",
244 [P_STATE] = "ReportState",
245 [P_SYNC_UUID] = "ReportSyncUUID",
246 [P_AUTH_CHALLENGE] = "AuthChallenge",
247 [P_AUTH_RESPONSE] = "AuthResponse",
248 [P_PING] = "Ping",
249 [P_PING_ACK] = "PingAck",
250 [P_RECV_ACK] = "RecvAck",
251 [P_WRITE_ACK] = "WriteAck",
252 [P_RS_WRITE_ACK] = "RSWriteAck",
253 [P_DISCARD_ACK] = "DiscardAck",
254 [P_NEG_ACK] = "NegAck",
255 [P_NEG_DREPLY] = "NegDReply",
256 [P_NEG_RS_DREPLY] = "NegRSDReply",
257 [P_BARRIER_ACK] = "BarrierAck",
258 [P_STATE_CHG_REQ] = "StateChgRequest",
259 [P_STATE_CHG_REPLY] = "StateChgReply",
260 [P_OV_REQUEST] = "OVRequest",
261 [P_OV_REPLY] = "OVReply",
262 [P_OV_RESULT] = "OVResult",
263 [P_CSUM_RS_REQUEST] = "CsumRSRequest",
264 [P_RS_IS_IN_SYNC] = "CsumRSIsInSync",
265 [P_COMPRESSED_BITMAP] = "CBitmap",
266 [P_DELAY_PROBE] = "DelayProbe",
267 [P_OUT_OF_SYNC] = "OutOfSync",
268 [P_MAX_CMD] = NULL,
269 };
270
271 if (cmd == P_HAND_SHAKE_M)
272 return "HandShakeM";
273 if (cmd == P_HAND_SHAKE_S)
274 return "HandShakeS";
275 if (cmd == P_HAND_SHAKE)
276 return "HandShake";
277 if (cmd >= P_MAX_CMD)
278 return "Unknown";
279 return cmdnames[cmd];
280}
281 237
282/* for sending/receiving the bitmap, 238/* for sending/receiving the bitmap,
283 * possibly in some encoding scheme */ 239 * possibly in some encoding scheme */
@@ -337,37 +293,24 @@ struct p_header80 {
337 u32 magic; 293 u32 magic;
338 u16 command; 294 u16 command;
339 u16 length; /* bytes of data after this header */ 295 u16 length; /* bytes of data after this header */
340 u8 payload[0];
341} __packed; 296} __packed;
342 297
343/* Header for big packets, Used for data packets exceeding 64kB */ 298/* Header for big packets, Used for data packets exceeding 64kB */
344struct p_header95 { 299struct p_header95 {
345 u16 magic; /* use DRBD_MAGIC_BIG here */ 300 u16 magic; /* use DRBD_MAGIC_BIG here */
346 u16 command; 301 u16 command;
347 u32 length; /* Use only 24 bits of that. Ignore the highest 8 bit. */ 302 u32 length;
348 u8 payload[0];
349} __packed; 303} __packed;
350 304
351union p_header { 305struct p_header100 {
352 struct p_header80 h80; 306 u32 magic;
353 struct p_header95 h95; 307 u16 volume;
354}; 308 u16 command;
355 309 u32 length;
356/* 310 u32 pad;
357 * short commands, packets without payload, plain p_header: 311} __packed;
358 * P_PING
359 * P_PING_ACK
360 * P_BECOME_SYNC_TARGET
361 * P_BECOME_SYNC_SOURCE
362 * P_UNPLUG_REMOTE
363 */
364 312
365/* 313extern unsigned int drbd_header_size(struct drbd_tconn *tconn);
366 * commands with out-of-struct payload:
367 * P_BITMAP (no additional fields)
368 * P_DATA, P_DATA_REPLY (see p_data)
369 * P_COMPRESSED_BITMAP (see receive_compressed_bitmap)
370 */
371 314
372/* these defines must not be changed without changing the protocol version */ 315/* these defines must not be changed without changing the protocol version */
373#define DP_HARDBARRIER 1 /* depricated */ 316#define DP_HARDBARRIER 1 /* depricated */
@@ -377,9 +320,10 @@ union p_header {
377#define DP_FUA 16 /* equals REQ_FUA */ 320#define DP_FUA 16 /* equals REQ_FUA */
378#define DP_FLUSH 32 /* equals REQ_FLUSH */ 321#define DP_FLUSH 32 /* equals REQ_FLUSH */
379#define DP_DISCARD 64 /* equals REQ_DISCARD */ 322#define DP_DISCARD 64 /* equals REQ_DISCARD */
323#define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */
324#define DP_SEND_WRITE_ACK 256 /* This is a proto C write request */
380 325
381struct p_data { 326struct p_data {
382 union p_header head;
383 u64 sector; /* 64 bits sector number */ 327 u64 sector; /* 64 bits sector number */
384 u64 block_id; /* to identify the request in protocol B&C */ 328 u64 block_id; /* to identify the request in protocol B&C */
385 u32 seq_num; 329 u32 seq_num;
@@ -390,21 +334,18 @@ struct p_data {
390 * commands which share a struct: 334 * commands which share a struct:
391 * p_block_ack: 335 * p_block_ack:
392 * P_RECV_ACK (proto B), P_WRITE_ACK (proto C), 336 * P_RECV_ACK (proto B), P_WRITE_ACK (proto C),
393 * P_DISCARD_ACK (proto C, two-primaries conflict detection) 337 * P_SUPERSEDED (proto C, two-primaries conflict detection)
394 * p_block_req: 338 * p_block_req:
395 * P_DATA_REQUEST, P_RS_DATA_REQUEST 339 * P_DATA_REQUEST, P_RS_DATA_REQUEST
396 */ 340 */
397struct p_block_ack { 341struct p_block_ack {
398 struct p_header80 head;
399 u64 sector; 342 u64 sector;
400 u64 block_id; 343 u64 block_id;
401 u32 blksize; 344 u32 blksize;
402 u32 seq_num; 345 u32 seq_num;
403} __packed; 346} __packed;
404 347
405
406struct p_block_req { 348struct p_block_req {
407 struct p_header80 head;
408 u64 sector; 349 u64 sector;
409 u64 block_id; 350 u64 block_id;
410 u32 blksize; 351 u32 blksize;
@@ -413,59 +354,52 @@ struct p_block_req {
413 354
414/* 355/*
415 * commands with their own struct for additional fields: 356 * commands with their own struct for additional fields:
416 * P_HAND_SHAKE 357 * P_CONNECTION_FEATURES
417 * P_BARRIER 358 * P_BARRIER
418 * P_BARRIER_ACK 359 * P_BARRIER_ACK
419 * P_SYNC_PARAM 360 * P_SYNC_PARAM
420 * ReportParams 361 * ReportParams
421 */ 362 */
422 363
423struct p_handshake { 364struct p_connection_features {
424 struct p_header80 head; /* 8 bytes */
425 u32 protocol_min; 365 u32 protocol_min;
426 u32 feature_flags; 366 u32 feature_flags;
427 u32 protocol_max; 367 u32 protocol_max;
428 368
429 /* should be more than enough for future enhancements 369 /* should be more than enough for future enhancements
430 * for now, feature_flags and the reserverd array shall be zero. 370 * for now, feature_flags and the reserved array shall be zero.
431 */ 371 */
432 372
433 u32 _pad; 373 u32 _pad;
434 u64 reserverd[7]; 374 u64 reserved[7];
435} __packed; 375} __packed;
436/* 80 bytes, FIXED for the next century */
437 376
438struct p_barrier { 377struct p_barrier {
439 struct p_header80 head;
440 u32 barrier; /* barrier number _handle_ only */ 378 u32 barrier; /* barrier number _handle_ only */
441 u32 pad; /* to multiple of 8 Byte */ 379 u32 pad; /* to multiple of 8 Byte */
442} __packed; 380} __packed;
443 381
444struct p_barrier_ack { 382struct p_barrier_ack {
445 struct p_header80 head;
446 u32 barrier; 383 u32 barrier;
447 u32 set_size; 384 u32 set_size;
448} __packed; 385} __packed;
449 386
450struct p_rs_param { 387struct p_rs_param {
451 struct p_header80 head; 388 u32 resync_rate;
452 u32 rate;
453 389
454 /* Since protocol version 88 and higher. */ 390 /* Since protocol version 88 and higher. */
455 char verify_alg[0]; 391 char verify_alg[0];
456} __packed; 392} __packed;
457 393
458struct p_rs_param_89 { 394struct p_rs_param_89 {
459 struct p_header80 head; 395 u32 resync_rate;
460 u32 rate;
461 /* protocol version 89: */ 396 /* protocol version 89: */
462 char verify_alg[SHARED_SECRET_MAX]; 397 char verify_alg[SHARED_SECRET_MAX];
463 char csums_alg[SHARED_SECRET_MAX]; 398 char csums_alg[SHARED_SECRET_MAX];
464} __packed; 399} __packed;
465 400
466struct p_rs_param_95 { 401struct p_rs_param_95 {
467 struct p_header80 head; 402 u32 resync_rate;
468 u32 rate;
469 char verify_alg[SHARED_SECRET_MAX]; 403 char verify_alg[SHARED_SECRET_MAX];
470 char csums_alg[SHARED_SECRET_MAX]; 404 char csums_alg[SHARED_SECRET_MAX];
471 u32 c_plan_ahead; 405 u32 c_plan_ahead;
@@ -475,12 +409,11 @@ struct p_rs_param_95 {
475} __packed; 409} __packed;
476 410
477enum drbd_conn_flags { 411enum drbd_conn_flags {
478 CF_WANT_LOSE = 1, 412 CF_DISCARD_MY_DATA = 1,
479 CF_DRY_RUN = 2, 413 CF_DRY_RUN = 2,
480}; 414};
481 415
482struct p_protocol { 416struct p_protocol {
483 struct p_header80 head;
484 u32 protocol; 417 u32 protocol;
485 u32 after_sb_0p; 418 u32 after_sb_0p;
486 u32 after_sb_1p; 419 u32 after_sb_1p;
@@ -494,17 +427,14 @@ struct p_protocol {
494} __packed; 427} __packed;
495 428
496struct p_uuids { 429struct p_uuids {
497 struct p_header80 head;
498 u64 uuid[UI_EXTENDED_SIZE]; 430 u64 uuid[UI_EXTENDED_SIZE];
499} __packed; 431} __packed;
500 432
501struct p_rs_uuid { 433struct p_rs_uuid {
502 struct p_header80 head;
503 u64 uuid; 434 u64 uuid;
504} __packed; 435} __packed;
505 436
506struct p_sizes { 437struct p_sizes {
507 struct p_header80 head;
508 u64 d_size; /* size of disk */ 438 u64 d_size; /* size of disk */
509 u64 u_size; /* user requested size */ 439 u64 u_size; /* user requested size */
510 u64 c_size; /* current exported size */ 440 u64 c_size; /* current exported size */
@@ -514,18 +444,15 @@ struct p_sizes {
514} __packed; 444} __packed;
515 445
516struct p_state { 446struct p_state {
517 struct p_header80 head;
518 u32 state; 447 u32 state;
519} __packed; 448} __packed;
520 449
521struct p_req_state { 450struct p_req_state {
522 struct p_header80 head;
523 u32 mask; 451 u32 mask;
524 u32 val; 452 u32 val;
525} __packed; 453} __packed;
526 454
527struct p_req_state_reply { 455struct p_req_state_reply {
528 struct p_header80 head;
529 u32 retcode; 456 u32 retcode;
530} __packed; 457} __packed;
531 458
@@ -539,15 +466,7 @@ struct p_drbd06_param {
539 u32 bit_map_gen[5]; 466 u32 bit_map_gen[5];
540} __packed; 467} __packed;
541 468
542struct p_discard {
543 struct p_header80 head;
544 u64 block_id;
545 u32 seq_num;
546 u32 pad;
547} __packed;
548
549struct p_block_desc { 469struct p_block_desc {
550 struct p_header80 head;
551 u64 sector; 470 u64 sector;
552 u32 blksize; 471 u32 blksize;
553 u32 pad; /* to multiple of 8 Byte */ 472 u32 pad; /* to multiple of 8 Byte */
@@ -563,7 +482,6 @@ enum drbd_bitmap_code {
563}; 482};
564 483
565struct p_compressed_bm { 484struct p_compressed_bm {
566 struct p_header80 head;
567 /* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code 485 /* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code
568 * (encoding & 0x80): polarity (set/unset) of first runlength 486 * (encoding & 0x80): polarity (set/unset) of first runlength
569 * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits 487 * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits
@@ -575,90 +493,22 @@ struct p_compressed_bm {
575} __packed; 493} __packed;
576 494
577struct p_delay_probe93 { 495struct p_delay_probe93 {
578 struct p_header80 head;
579 u32 seq_num; /* sequence number to match the two probe packets */ 496 u32 seq_num; /* sequence number to match the two probe packets */
580 u32 offset; /* usecs the probe got sent after the reference time point */ 497 u32 offset; /* usecs the probe got sent after the reference time point */
581} __packed; 498} __packed;
582 499
583/* DCBP: Drbd Compressed Bitmap Packet ... */ 500/*
584static inline enum drbd_bitmap_code 501 * Bitmap packets need to fit within a single page on the sender and receiver,
585DCBP_get_code(struct p_compressed_bm *p) 502 * so we are limited to 4 KiB (and not to PAGE_SIZE, which can be bigger).
586{
587 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
588}
589
590static inline void
591DCBP_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code)
592{
593 BUG_ON(code & ~0xf);
594 p->encoding = (p->encoding & ~0xf) | code;
595}
596
597static inline int
598DCBP_get_start(struct p_compressed_bm *p)
599{
600 return (p->encoding & 0x80) != 0;
601}
602
603static inline void
604DCBP_set_start(struct p_compressed_bm *p, int set)
605{
606 p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0);
607}
608
609static inline int
610DCBP_get_pad_bits(struct p_compressed_bm *p)
611{
612 return (p->encoding >> 4) & 0x7;
613}
614
615static inline void
616DCBP_set_pad_bits(struct p_compressed_bm *p, int n)
617{
618 BUG_ON(n & ~0x7);
619 p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4);
620}
621
622/* one bitmap packet, including the p_header,
623 * should fit within one _architecture independend_ page.
624 * so we need to use the fixed size 4KiB page size
625 * most architectures have used for a long time.
626 */ 503 */
627#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header80)) 504#define DRBD_SOCKET_BUFFER_SIZE 4096
628#define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long))
629#define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm))
630#if (PAGE_SIZE < 4096)
631/* drbd_send_bitmap / receive_bitmap would break horribly */
632#error "PAGE_SIZE too small"
633#endif
634
635union p_polymorph {
636 union p_header header;
637 struct p_handshake handshake;
638 struct p_data data;
639 struct p_block_ack block_ack;
640 struct p_barrier barrier;
641 struct p_barrier_ack barrier_ack;
642 struct p_rs_param_89 rs_param_89;
643 struct p_rs_param_95 rs_param_95;
644 struct p_protocol protocol;
645 struct p_sizes sizes;
646 struct p_uuids uuids;
647 struct p_state state;
648 struct p_req_state req_state;
649 struct p_req_state_reply req_state_reply;
650 struct p_block_req block_req;
651 struct p_delay_probe93 delay_probe93;
652 struct p_rs_uuid rs_uuid;
653 struct p_block_desc block_desc;
654} __packed;
655 505
656/**********************************************************************/ 506/**********************************************************************/
657enum drbd_thread_state { 507enum drbd_thread_state {
658 None, 508 NONE,
659 Running, 509 RUNNING,
660 Exiting, 510 EXITING,
661 Restarting 511 RESTARTING
662}; 512};
663 513
664struct drbd_thread { 514struct drbd_thread {
@@ -667,8 +517,9 @@ struct drbd_thread {
667 struct completion stop; 517 struct completion stop;
668 enum drbd_thread_state t_state; 518 enum drbd_thread_state t_state;
669 int (*function) (struct drbd_thread *); 519 int (*function) (struct drbd_thread *);
670 struct drbd_conf *mdev; 520 struct drbd_tconn *tconn;
671 int reset_cpu_mask; 521 int reset_cpu_mask;
522 char name[9];
672}; 523};
673 524
674static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi) 525static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi)
@@ -681,58 +532,54 @@ static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi)
681 return thi->t_state; 532 return thi->t_state;
682} 533}
683 534
684struct drbd_work;
685typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel);
686struct drbd_work { 535struct drbd_work {
687 struct list_head list; 536 struct list_head list;
688 drbd_work_cb cb; 537 int (*cb)(struct drbd_work *, int cancel);
538 union {
539 struct drbd_conf *mdev;
540 struct drbd_tconn *tconn;
541 };
689}; 542};
690 543
691struct drbd_tl_epoch; 544#include "drbd_interval.h"
545
546extern int drbd_wait_misc(struct drbd_conf *, struct drbd_interval *);
547
692struct drbd_request { 548struct drbd_request {
693 struct drbd_work w; 549 struct drbd_work w;
694 struct drbd_conf *mdev;
695 550
696 /* if local IO is not allowed, will be NULL. 551 /* if local IO is not allowed, will be NULL.
697 * if local IO _is_ allowed, holds the locally submitted bio clone, 552 * if local IO _is_ allowed, holds the locally submitted bio clone,
698 * or, after local IO completion, the ERR_PTR(error). 553 * or, after local IO completion, the ERR_PTR(error).
699 * see drbd_endio_pri(). */ 554 * see drbd_request_endio(). */
700 struct bio *private_bio; 555 struct bio *private_bio;
701 556
702 struct hlist_node collision; 557 struct drbd_interval i;
703 sector_t sector;
704 unsigned int size;
705 unsigned int epoch; /* barrier_nr */
706 558
707 /* barrier_nr: used to check on "completion" whether this req was in 559 /* epoch: used to check on "completion" whether this req was in
708 * the current epoch, and we therefore have to close it, 560 * the current epoch, and we therefore have to close it,
709 * starting a new epoch... 561 * causing a p_barrier packet to be send, starting a new epoch.
562 *
563 * This corresponds to "barrier" in struct p_barrier[_ack],
564 * and to "barrier_nr" in struct drbd_epoch (and various
565 * comments/function parameters/local variable names).
710 */ 566 */
567 unsigned int epoch;
711 568
712 struct list_head tl_requests; /* ring list in the transfer log */ 569 struct list_head tl_requests; /* ring list in the transfer log */
713 struct bio *master_bio; /* master bio pointer */ 570 struct bio *master_bio; /* master bio pointer */
714 unsigned long rq_state; /* see comments above _req_mod() */
715 unsigned long start_time; 571 unsigned long start_time;
716};
717 572
718struct drbd_tl_epoch { 573 /* once it hits 0, we may complete the master_bio */
719 struct drbd_work w; 574 atomic_t completion_ref;
720 struct list_head requests; /* requests before */ 575 /* once it hits 0, we may destroy this drbd_request object */
721 struct drbd_tl_epoch *next; /* pointer to the next barrier */ 576 struct kref kref;
722 unsigned int br_number; /* the barriers identifier. */
723 int n_writes; /* number of requests attached before this barrier */
724};
725 577
726struct drbd_request; 578 unsigned rq_state; /* see comments above _req_mod() */
727 579};
728/* These Tl_epoch_entries may be in one of 6 lists:
729 active_ee .. data packet being written
730 sync_ee .. syncer block being written
731 done_ee .. block written, need to send P_WRITE_ACK
732 read_ee .. [RS]P_DATA_REQUEST being read
733*/
734 580
735struct drbd_epoch { 581struct drbd_epoch {
582 struct drbd_tconn *tconn;
736 struct list_head list; 583 struct list_head list;
737 unsigned int barrier_nr; 584 unsigned int barrier_nr;
738 atomic_t epoch_size; /* increased on every request added. */ 585 atomic_t epoch_size; /* increased on every request added. */
@@ -762,17 +609,14 @@ struct digest_info {
762 void *digest; 609 void *digest;
763}; 610};
764 611
765struct drbd_epoch_entry { 612struct drbd_peer_request {
766 struct drbd_work w; 613 struct drbd_work w;
767 struct hlist_node collision;
768 struct drbd_epoch *epoch; /* for writes */ 614 struct drbd_epoch *epoch; /* for writes */
769 struct drbd_conf *mdev;
770 struct page *pages; 615 struct page *pages;
771 atomic_t pending_bios; 616 atomic_t pending_bios;
772 unsigned int size; 617 struct drbd_interval i;
773 /* see comments on ee flag bits below */ 618 /* see comments on ee flag bits below */
774 unsigned long flags; 619 unsigned long flags;
775 sector_t sector;
776 union { 620 union {
777 u64 block_id; 621 u64 block_id;
778 struct digest_info *digest; 622 struct digest_info *digest;
@@ -793,31 +637,37 @@ enum {
793 * we need to resubmit without the barrier flag. */ 637 * we need to resubmit without the barrier flag. */
794 __EE_RESUBMITTED, 638 __EE_RESUBMITTED,
795 639
796 /* we may have several bios per epoch entry. 640 /* we may have several bios per peer request.
797 * if any of those fail, we set this flag atomically 641 * if any of those fail, we set this flag atomically
798 * from the endio callback */ 642 * from the endio callback */
799 __EE_WAS_ERROR, 643 __EE_WAS_ERROR,
800 644
801 /* This ee has a pointer to a digest instead of a block id */ 645 /* This ee has a pointer to a digest instead of a block id */
802 __EE_HAS_DIGEST, 646 __EE_HAS_DIGEST,
647
648 /* Conflicting local requests need to be restarted after this request */
649 __EE_RESTART_REQUESTS,
650
651 /* The peer wants a write ACK for this (wire proto C) */
652 __EE_SEND_WRITE_ACK,
653
654 /* Is set when net_conf had two_primaries set while creating this peer_req */
655 __EE_IN_INTERVAL_TREE,
803}; 656};
804#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) 657#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
805#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) 658#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
806#define EE_RESUBMITTED (1<<__EE_RESUBMITTED) 659#define EE_RESUBMITTED (1<<__EE_RESUBMITTED)
807#define EE_WAS_ERROR (1<<__EE_WAS_ERROR) 660#define EE_WAS_ERROR (1<<__EE_WAS_ERROR)
808#define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST) 661#define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST)
662#define EE_RESTART_REQUESTS (1<<__EE_RESTART_REQUESTS)
663#define EE_SEND_WRITE_ACK (1<<__EE_SEND_WRITE_ACK)
664#define EE_IN_INTERVAL_TREE (1<<__EE_IN_INTERVAL_TREE)
809 665
810/* global flag bits */ 666/* flag bits per mdev */
811enum drbd_flag { 667enum {
812 CREATE_BARRIER, /* next P_DATA is preceded by a P_BARRIER */
813 SIGNAL_ASENDER, /* whether asender wants to be interrupted */
814 SEND_PING, /* whether asender should send a ping asap */
815
816 UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */ 668 UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */
817 MD_DIRTY, /* current uuids and flags not yet on disk */ 669 MD_DIRTY, /* current uuids and flags not yet on disk */
818 DISCARD_CONCURRENT, /* Set on one node, cleared on the peer! */
819 USE_DEGR_WFC_T, /* degr-wfc-timeout instead of wfc-timeout. */ 670 USE_DEGR_WFC_T, /* degr-wfc-timeout instead of wfc-timeout. */
820 CLUSTER_ST_CHANGE, /* Cluster wide state change going on... */
821 CL_ST_CHG_SUCCESS, 671 CL_ST_CHG_SUCCESS,
822 CL_ST_CHG_FAIL, 672 CL_ST_CHG_FAIL,
823 CRASHED_PRIMARY, /* This node was a crashed primary. 673 CRASHED_PRIMARY, /* This node was a crashed primary.
@@ -835,33 +685,14 @@ enum drbd_flag {
835 WAS_READ_ERROR, /* Local disk READ failed (set additionally to the above) */ 685 WAS_READ_ERROR, /* Local disk READ failed (set additionally to the above) */
836 FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */ 686 FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */
837 RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ 687 RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */
838 NET_CONGESTED, /* The data socket is congested */
839
840 CONFIG_PENDING, /* serialization of (re)configuration requests.
841 * if set, also prevents the device from dying */
842 DEVICE_DYING, /* device became unconfigured,
843 * but worker thread is still handling the cleanup.
844 * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed,
845 * while this is set. */
846 RESIZE_PENDING, /* Size change detected locally, waiting for the response from 688 RESIZE_PENDING, /* Size change detected locally, waiting for the response from
847 * the peer, if it changed there as well. */ 689 * the peer, if it changed there as well. */
848 CONN_DRY_RUN, /* Expect disconnect after resync handshake. */
849 GOT_PING_ACK, /* set when we receive a ping_ack packet, misc wait gets woken */
850 NEW_CUR_UUID, /* Create new current UUID when thawing IO */ 690 NEW_CUR_UUID, /* Create new current UUID when thawing IO */
851 AL_SUSPENDED, /* Activity logging is currently suspended. */ 691 AL_SUSPENDED, /* Activity logging is currently suspended. */
852 AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ 692 AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */
853 STATE_SENT, /* Do not change state/UUIDs while this is set */ 693 B_RS_H_DONE, /* Before resync handler done (already executed) */
854 694 DISCARD_MY_DATA, /* discard_my_data flag per volume */
855 CALLBACK_PENDING, /* Whether we have a call_usermodehelper(, UMH_WAIT_PROC) 695 READ_BALANCE_RR,
856 * pending, from drbd worker context.
857 * If set, bdi_write_congested() returns true,
858 * so shrink_page_list() would not recurse into,
859 * and potentially deadlock on, this drbd worker.
860 */
861 DISCONNECT_SENT, /* Currently the last bit in this 32bit word */
862
863 /* keep last */
864 DRBD_N_FLAGS,
865}; 696};
866 697
867struct drbd_bitmap; /* opaque for drbd_conf */ 698struct drbd_bitmap; /* opaque for drbd_conf */
@@ -899,18 +730,17 @@ enum bm_flag {
899 730
900struct drbd_work_queue { 731struct drbd_work_queue {
901 struct list_head q; 732 struct list_head q;
902 struct semaphore s; /* producers up it, worker down()s it */
903 spinlock_t q_lock; /* to protect the list. */ 733 spinlock_t q_lock; /* to protect the list. */
734 wait_queue_head_t q_wait;
904}; 735};
905 736
906struct drbd_socket { 737struct drbd_socket {
907 struct drbd_work_queue work;
908 struct mutex mutex; 738 struct mutex mutex;
909 struct socket *socket; 739 struct socket *socket;
910 /* this way we get our 740 /* this way we get our
911 * send/receive buffers off the stack */ 741 * send/receive buffers off the stack */
912 union p_polymorph sbuf; 742 void *sbuf;
913 union p_polymorph rbuf; 743 void *rbuf;
914}; 744};
915 745
916struct drbd_md { 746struct drbd_md {
@@ -927,24 +757,16 @@ struct drbd_md {
927 s32 bm_offset; /* signed relative sector offset to bitmap */ 757 s32 bm_offset; /* signed relative sector offset to bitmap */
928 758
929 /* u32 al_nr_extents; important for restoring the AL 759 /* u32 al_nr_extents; important for restoring the AL
930 * is stored into sync_conf.al_extents, which in turn 760 * is stored into ldev->dc.al_extents, which in turn
931 * gets applied to act_log->nr_elements 761 * gets applied to act_log->nr_elements
932 */ 762 */
933}; 763};
934 764
935/* for sync_conf and other types... */
936#define NL_PACKET(name, number, fields) struct name { fields };
937#define NL_INTEGER(pn,pr,member) int member;
938#define NL_INT64(pn,pr,member) __u64 member;
939#define NL_BIT(pn,pr,member) unsigned member:1;
940#define NL_STRING(pn,pr,member,len) unsigned char member[len]; int member ## _len;
941#include <linux/drbd_nl.h>
942
943struct drbd_backing_dev { 765struct drbd_backing_dev {
944 struct block_device *backing_bdev; 766 struct block_device *backing_bdev;
945 struct block_device *md_bdev; 767 struct block_device *md_bdev;
946 struct drbd_md md; 768 struct drbd_md md;
947 struct disk_conf dc; /* The user provided config... */ 769 struct disk_conf *disk_conf; /* RCU, for updates: mdev->tconn->conf_update */
948 sector_t known_size; /* last known size of that backing device */ 770 sector_t known_size; /* last known size of that backing device */
949}; 771};
950 772
@@ -968,17 +790,116 @@ enum write_ordering_e {
968}; 790};
969 791
970struct fifo_buffer { 792struct fifo_buffer {
971 int *values;
972 unsigned int head_index; 793 unsigned int head_index;
973 unsigned int size; 794 unsigned int size;
795 int total; /* sum of all values */
796 int values[0];
797};
798extern struct fifo_buffer *fifo_alloc(int fifo_size);
799
800/* flag bits per tconn */
801enum {
802 NET_CONGESTED, /* The data socket is congested */
803 RESOLVE_CONFLICTS, /* Set on one node, cleared on the peer! */
804 SEND_PING, /* whether asender should send a ping asap */
805 SIGNAL_ASENDER, /* whether asender wants to be interrupted */
806 GOT_PING_ACK, /* set when we receive a ping_ack packet, ping_wait gets woken */
807 CONN_WD_ST_CHG_REQ, /* A cluster wide state change on the connection is active */
808 CONN_WD_ST_CHG_OKAY,
809 CONN_WD_ST_CHG_FAIL,
810 CONN_DRY_RUN, /* Expect disconnect after resync handshake. */
811 CREATE_BARRIER, /* next P_DATA is preceded by a P_BARRIER */
812 STATE_SENT, /* Do not change state/UUIDs while this is set */
813 CALLBACK_PENDING, /* Whether we have a call_usermodehelper(, UMH_WAIT_PROC)
814 * pending, from drbd worker context.
815 * If set, bdi_write_congested() returns true,
816 * so shrink_page_list() would not recurse into,
817 * and potentially deadlock on, this drbd worker.
818 */
819 DISCONNECT_SENT,
820};
821
822struct drbd_tconn { /* is a resource from the config file */
823 char *name; /* Resource name */
824 struct list_head all_tconn; /* linked on global drbd_tconns */
825 struct kref kref;
826 struct idr volumes; /* <tconn, vnr> to mdev mapping */
827 enum drbd_conns cstate; /* Only C_STANDALONE to C_WF_REPORT_PARAMS */
828 unsigned susp:1; /* IO suspended by user */
829 unsigned susp_nod:1; /* IO suspended because no data */
830 unsigned susp_fen:1; /* IO suspended because fence peer handler runs */
831 struct mutex cstate_mutex; /* Protects graceful disconnects */
832
833 unsigned long flags;
834 struct net_conf *net_conf; /* content protected by rcu */
835 struct mutex conf_update; /* mutex for ready-copy-update of net_conf and disk_conf */
836 wait_queue_head_t ping_wait; /* Woken upon reception of a ping, and a state change */
837 struct res_opts res_opts;
838
839 struct sockaddr_storage my_addr;
840 int my_addr_len;
841 struct sockaddr_storage peer_addr;
842 int peer_addr_len;
843
844 struct drbd_socket data; /* data/barrier/cstate/parameter packets */
845 struct drbd_socket meta; /* ping/ack (metadata) packets */
846 int agreed_pro_version; /* actually used protocol version */
847 unsigned long last_received; /* in jiffies, either socket */
848 unsigned int ko_count;
849
850 spinlock_t req_lock;
851
852 struct list_head transfer_log; /* all requests not yet fully processed */
853
854 struct crypto_hash *cram_hmac_tfm;
855 struct crypto_hash *integrity_tfm; /* checksums we compute, updates protected by tconn->data->mutex */
856 struct crypto_hash *peer_integrity_tfm; /* checksums we verify, only accessed from receiver thread */
857 struct crypto_hash *csums_tfm;
858 struct crypto_hash *verify_tfm;
859 void *int_dig_in;
860 void *int_dig_vv;
861
862 /* receiver side */
863 struct drbd_epoch *current_epoch;
864 spinlock_t epoch_lock;
865 unsigned int epochs;
866 enum write_ordering_e write_ordering;
867 atomic_t current_tle_nr; /* transfer log epoch number */
868 unsigned current_tle_writes; /* writes seen within this tl epoch */
869
870 unsigned long last_reconnect_jif;
871 struct drbd_thread receiver;
872 struct drbd_thread worker;
873 struct drbd_thread asender;
874 cpumask_var_t cpu_mask;
875
876 /* sender side */
877 struct drbd_work_queue sender_work;
878
879 struct {
880 /* whether this sender thread
881 * has processed a single write yet. */
882 bool seen_any_write_yet;
883
884 /* Which barrier number to send with the next P_BARRIER */
885 int current_epoch_nr;
886
887 /* how many write requests have been sent
888 * with req->epoch == current_epoch_nr.
889 * If none, no P_BARRIER will be sent. */
890 unsigned current_epoch_writes;
891 } send;
974}; 892};
975 893
976struct drbd_conf { 894struct drbd_conf {
977 unsigned long drbd_flags[(DRBD_N_FLAGS + BITS_PER_LONG -1)/BITS_PER_LONG]; 895 struct drbd_tconn *tconn;
896 int vnr; /* volume number within the connection */
897 struct kref kref;
898
899 /* things that are stored as / read from meta data on disk */
900 unsigned long flags;
978 901
979 /* configured by drbdsetup */ 902 /* configured by drbdsetup */
980 struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */
981 struct syncer_conf sync_conf;
982 struct drbd_backing_dev *ldev __protected_by(local); 903 struct drbd_backing_dev *ldev __protected_by(local);
983 904
984 sector_t p_size; /* partner's disk size */ 905 sector_t p_size; /* partner's disk size */
@@ -986,11 +907,7 @@ struct drbd_conf {
986 struct block_device *this_bdev; 907 struct block_device *this_bdev;
987 struct gendisk *vdisk; 908 struct gendisk *vdisk;
988 909
989 struct drbd_socket data; /* data/barrier/cstate/parameter packets */ 910 unsigned long last_reattach_jif;
990 struct drbd_socket meta; /* ping/ack (metadata) packets */
991 int agreed_pro_version; /* actually used protocol version */
992 unsigned long last_received; /* in jiffies, either socket */
993 unsigned int ko_count;
994 struct drbd_work resync_work, 911 struct drbd_work resync_work,
995 unplug_work, 912 unplug_work,
996 go_diskless, 913 go_diskless,
@@ -1010,10 +927,9 @@ struct drbd_conf {
1010 /* Used after attach while negotiating new disk state. */ 927 /* Used after attach while negotiating new disk state. */
1011 union drbd_state new_state_tmp; 928 union drbd_state new_state_tmp;
1012 929
1013 union drbd_state state; 930 union drbd_dev_state state;
1014 wait_queue_head_t misc_wait; 931 wait_queue_head_t misc_wait;
1015 wait_queue_head_t state_wait; /* upon each state change. */ 932 wait_queue_head_t state_wait; /* upon each state change. */
1016 wait_queue_head_t net_cnt_wait;
1017 unsigned int send_cnt; 933 unsigned int send_cnt;
1018 unsigned int recv_cnt; 934 unsigned int recv_cnt;
1019 unsigned int read_cnt; 935 unsigned int read_cnt;
@@ -1023,17 +939,12 @@ struct drbd_conf {
1023 atomic_t ap_bio_cnt; /* Requests we need to complete */ 939 atomic_t ap_bio_cnt; /* Requests we need to complete */
1024 atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */ 940 atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */
1025 atomic_t rs_pending_cnt; /* RS request/data packets on the wire */ 941 atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
1026 atomic_t unacked_cnt; /* Need to send replys for */ 942 atomic_t unacked_cnt; /* Need to send replies for */
1027 atomic_t local_cnt; /* Waiting for local completion */ 943 atomic_t local_cnt; /* Waiting for local completion */
1028 atomic_t net_cnt; /* Users of net_conf */ 944
1029 spinlock_t req_lock; 945 /* Interval tree of pending local requests */
1030 struct drbd_tl_epoch *unused_spare_tle; /* for pre-allocation */ 946 struct rb_root read_requests;
1031 struct drbd_tl_epoch *newest_tle; 947 struct rb_root write_requests;
1032 struct drbd_tl_epoch *oldest_tle;
1033 struct list_head out_of_sequence_requests;
1034 struct list_head barrier_acked_requests;
1035 struct hlist_head *tl_hash;
1036 unsigned int tl_hash_s;
1037 948
1038 /* blocks to resync in this run [unit BM_BLOCK_SIZE] */ 949 /* blocks to resync in this run [unit BM_BLOCK_SIZE] */
1039 unsigned long rs_total; 950 unsigned long rs_total;
@@ -1053,6 +964,7 @@ struct drbd_conf {
1053 unsigned long rs_mark_time[DRBD_SYNC_MARKS]; 964 unsigned long rs_mark_time[DRBD_SYNC_MARKS];
1054 /* current index into rs_mark_{left,time} */ 965 /* current index into rs_mark_{left,time} */
1055 int rs_last_mark; 966 int rs_last_mark;
967 unsigned long rs_last_bcast; /* [unit jiffies] */
1056 968
1057 /* where does the admin want us to start? (sector) */ 969 /* where does the admin want us to start? (sector) */
1058 sector_t ov_start_sector; 970 sector_t ov_start_sector;
@@ -1064,14 +976,7 @@ struct drbd_conf {
1064 /* size of out-of-sync range in sectors. */ 976 /* size of out-of-sync range in sectors. */
1065 sector_t ov_last_oos_size; 977 sector_t ov_last_oos_size;
1066 unsigned long ov_left; /* in bits */ 978 unsigned long ov_left; /* in bits */
1067 struct crypto_hash *csums_tfm;
1068 struct crypto_hash *verify_tfm;
1069 979
1070 unsigned long last_reattach_jif;
1071 unsigned long last_reconnect_jif;
1072 struct drbd_thread receiver;
1073 struct drbd_thread worker;
1074 struct drbd_thread asender;
1075 struct drbd_bitmap *bitmap; 980 struct drbd_bitmap *bitmap;
1076 unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */ 981 unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */
1077 982
@@ -1084,29 +989,19 @@ struct drbd_conf {
1084 989
1085 int open_cnt; 990 int open_cnt;
1086 u64 *p_uuid; 991 u64 *p_uuid;
1087 struct drbd_epoch *current_epoch; 992
1088 spinlock_t epoch_lock;
1089 unsigned int epochs;
1090 enum write_ordering_e write_ordering;
1091 struct list_head active_ee; /* IO in progress (P_DATA gets written to disk) */ 993 struct list_head active_ee; /* IO in progress (P_DATA gets written to disk) */
1092 struct list_head sync_ee; /* IO in progress (P_RS_DATA_REPLY gets written to disk) */ 994 struct list_head sync_ee; /* IO in progress (P_RS_DATA_REPLY gets written to disk) */
1093 struct list_head done_ee; /* send ack */ 995 struct list_head done_ee; /* need to send P_WRITE_ACK */
1094 struct list_head read_ee; /* IO in progress (any read) */ 996 struct list_head read_ee; /* [RS]P_DATA_REQUEST being read */
1095 struct list_head net_ee; /* zero-copy network send in progress */ 997 struct list_head net_ee; /* zero-copy network send in progress */
1096 struct hlist_head *ee_hash; /* is proteced by req_lock! */
1097 unsigned int ee_hash_s;
1098
1099 /* this one is protected by ee_lock, single thread */
1100 struct drbd_epoch_entry *last_write_w_barrier;
1101 998
1102 int next_barrier_nr; 999 int next_barrier_nr;
1103 struct hlist_head *app_reads_hash; /* is proteced by req_lock */
1104 struct list_head resync_reads; 1000 struct list_head resync_reads;
1105 atomic_t pp_in_use; /* allocated from page pool */ 1001 atomic_t pp_in_use; /* allocated from page pool */
1106 atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */ 1002 atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */
1107 wait_queue_head_t ee_wait; 1003 wait_queue_head_t ee_wait;
1108 struct page *md_io_page; /* one page buffer for md_io */ 1004 struct page *md_io_page; /* one page buffer for md_io */
1109 struct page *md_io_tmpp; /* for logical_block_size != 512 */
1110 struct drbd_md_io md_io; 1005 struct drbd_md_io md_io;
1111 atomic_t md_io_in_use; /* protects the md_io, md_io_page and md_io_tmpp */ 1006 atomic_t md_io_in_use; /* protects the md_io, md_io_page and md_io_tmpp */
1112 spinlock_t al_lock; 1007 spinlock_t al_lock;
@@ -1115,22 +1010,16 @@ struct drbd_conf {
1115 unsigned int al_tr_number; 1010 unsigned int al_tr_number;
1116 int al_tr_cycle; 1011 int al_tr_cycle;
1117 int al_tr_pos; /* position of the next transaction in the journal */ 1012 int al_tr_pos; /* position of the next transaction in the journal */
1118 struct crypto_hash *cram_hmac_tfm;
1119 struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */
1120 struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */
1121 void *int_dig_out;
1122 void *int_dig_in;
1123 void *int_dig_vv;
1124 wait_queue_head_t seq_wait; 1013 wait_queue_head_t seq_wait;
1125 atomic_t packet_seq; 1014 atomic_t packet_seq;
1126 unsigned int peer_seq; 1015 unsigned int peer_seq;
1127 spinlock_t peer_seq_lock; 1016 spinlock_t peer_seq_lock;
1128 unsigned int minor; 1017 unsigned int minor;
1129 unsigned long comm_bm_set; /* communicated number of set bits. */ 1018 unsigned long comm_bm_set; /* communicated number of set bits. */
1130 cpumask_var_t cpu_mask;
1131 struct bm_io_work bm_io_work; 1019 struct bm_io_work bm_io_work;
1132 u64 ed_uuid; /* UUID of the exposed data */ 1020 u64 ed_uuid; /* UUID of the exposed data */
1133 struct mutex state_mutex; 1021 struct mutex own_state_mutex;
1022 struct mutex *state_mutex; /* either own_state_mutex or mdev->tconn->cstate_mutex */
1134 char congestion_reason; /* Why we where congested... */ 1023 char congestion_reason; /* Why we where congested... */
1135 atomic_t rs_sect_in; /* for incoming resync data rate, SyncTarget */ 1024 atomic_t rs_sect_in; /* for incoming resync data rate, SyncTarget */
1136 atomic_t rs_sect_ev; /* for submitted resync data rate, both */ 1025 atomic_t rs_sect_ev; /* for submitted resync data rate, both */
@@ -1138,46 +1027,16 @@ struct drbd_conf {
1138 int rs_last_events; /* counter of read or write "events" (unit sectors) 1027 int rs_last_events; /* counter of read or write "events" (unit sectors)
1139 * on the lower level device when we last looked. */ 1028 * on the lower level device when we last looked. */
1140 int c_sync_rate; /* current resync rate after syncer throttle magic */ 1029 int c_sync_rate; /* current resync rate after syncer throttle magic */
1141 struct fifo_buffer rs_plan_s; /* correction values of resync planer */ 1030 struct fifo_buffer *rs_plan_s; /* correction values of resync planer (RCU, tconn->conn_update) */
1142 int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */ 1031 int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
1143 int rs_planed; /* resync sectors already planned */
1144 atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */ 1032 atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */
1145 unsigned int peer_max_bio_size; 1033 unsigned int peer_max_bio_size;
1146 unsigned int local_max_bio_size; 1034 unsigned int local_max_bio_size;
1147}; 1035};
1148 1036
1149static inline void drbd_set_flag(struct drbd_conf *mdev, enum drbd_flag f)
1150{
1151 set_bit(f, &mdev->drbd_flags[0]);
1152}
1153
1154static inline void drbd_clear_flag(struct drbd_conf *mdev, enum drbd_flag f)
1155{
1156 clear_bit(f, &mdev->drbd_flags[0]);
1157}
1158
1159static inline int drbd_test_flag(struct drbd_conf *mdev, enum drbd_flag f)
1160{
1161 return test_bit(f, &mdev->drbd_flags[0]);
1162}
1163
1164static inline int drbd_test_and_set_flag(struct drbd_conf *mdev, enum drbd_flag f)
1165{
1166 return test_and_set_bit(f, &mdev->drbd_flags[0]);
1167}
1168
1169static inline int drbd_test_and_clear_flag(struct drbd_conf *mdev, enum drbd_flag f)
1170{
1171 return test_and_clear_bit(f, &mdev->drbd_flags[0]);
1172}
1173
1174static inline struct drbd_conf *minor_to_mdev(unsigned int minor) 1037static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
1175{ 1038{
1176 struct drbd_conf *mdev; 1039 return (struct drbd_conf *)idr_find(&minors, minor);
1177
1178 mdev = minor < minor_count ? minor_table[minor] : NULL;
1179
1180 return mdev;
1181} 1040}
1182 1041
1183static inline unsigned int mdev_to_minor(struct drbd_conf *mdev) 1042static inline unsigned int mdev_to_minor(struct drbd_conf *mdev)
@@ -1185,29 +1044,9 @@ static inline unsigned int mdev_to_minor(struct drbd_conf *mdev)
1185 return mdev->minor; 1044 return mdev->minor;
1186} 1045}
1187 1046
1188/* returns 1 if it was successful, 1047static inline struct drbd_conf *vnr_to_mdev(struct drbd_tconn *tconn, int vnr)
1189 * returns 0 if there was no data socket.
1190 * so wherever you are going to use the data.socket, e.g. do
1191 * if (!drbd_get_data_sock(mdev))
1192 * return 0;
1193 * CODE();
1194 * drbd_put_data_sock(mdev);
1195 */
1196static inline int drbd_get_data_sock(struct drbd_conf *mdev)
1197{
1198 mutex_lock(&mdev->data.mutex);
1199 /* drbd_disconnect() could have called drbd_free_sock()
1200 * while we were waiting in down()... */
1201 if (unlikely(mdev->data.socket == NULL)) {
1202 mutex_unlock(&mdev->data.mutex);
1203 return 0;
1204 }
1205 return 1;
1206}
1207
1208static inline void drbd_put_data_sock(struct drbd_conf *mdev)
1209{ 1048{
1210 mutex_unlock(&mdev->data.mutex); 1049 return (struct drbd_conf *)idr_find(&tconn->volumes, vnr);
1211} 1050}
1212 1051
1213/* 1052/*
@@ -1216,99 +1055,69 @@ static inline void drbd_put_data_sock(struct drbd_conf *mdev)
1216 1055
1217/* drbd_main.c */ 1056/* drbd_main.c */
1218 1057
1219enum chg_state_flags {
1220 CS_HARD = 1,
1221 CS_VERBOSE = 2,
1222 CS_WAIT_COMPLETE = 4,
1223 CS_SERIALIZE = 8,
1224 CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE,
1225};
1226
1227enum dds_flags { 1058enum dds_flags {
1228 DDSF_FORCED = 1, 1059 DDSF_FORCED = 1,
1229 DDSF_NO_RESYNC = 2, /* Do not run a resync for the new space */ 1060 DDSF_NO_RESYNC = 2, /* Do not run a resync for the new space */
1230}; 1061};
1231 1062
1232extern void drbd_init_set_defaults(struct drbd_conf *mdev); 1063extern void drbd_init_set_defaults(struct drbd_conf *mdev);
1233extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev,
1234 enum chg_state_flags f,
1235 union drbd_state mask,
1236 union drbd_state val);
1237extern void drbd_force_state(struct drbd_conf *, union drbd_state,
1238 union drbd_state);
1239extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *,
1240 union drbd_state,
1241 union drbd_state,
1242 enum chg_state_flags);
1243extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state,
1244 enum chg_state_flags,
1245 struct completion *done);
1246extern void print_st_err(struct drbd_conf *, union drbd_state,
1247 union drbd_state, int);
1248extern int drbd_thread_start(struct drbd_thread *thi); 1064extern int drbd_thread_start(struct drbd_thread *thi);
1249extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); 1065extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait);
1066extern char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *task);
1250#ifdef CONFIG_SMP 1067#ifdef CONFIG_SMP
1251extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev); 1068extern void drbd_thread_current_set_cpu(struct drbd_thread *thi);
1252extern void drbd_calc_cpu_mask(struct drbd_conf *mdev); 1069extern void drbd_calc_cpu_mask(struct drbd_tconn *tconn);
1253#else 1070#else
1254#define drbd_thread_current_set_cpu(A) ({}) 1071#define drbd_thread_current_set_cpu(A) ({})
1255#define drbd_calc_cpu_mask(A) ({}) 1072#define drbd_calc_cpu_mask(A) ({})
1256#endif 1073#endif
1257extern void drbd_free_resources(struct drbd_conf *mdev); 1074extern void tl_release(struct drbd_tconn *, unsigned int barrier_nr,
1258extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
1259 unsigned int set_size); 1075 unsigned int set_size);
1260extern void tl_clear(struct drbd_conf *mdev); 1076extern void tl_clear(struct drbd_tconn *);
1261extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *); 1077extern void drbd_free_sock(struct drbd_tconn *tconn);
1262extern void drbd_free_sock(struct drbd_conf *mdev); 1078extern int drbd_send(struct drbd_tconn *tconn, struct socket *sock,
1263extern int drbd_send(struct drbd_conf *mdev, struct socket *sock, 1079 void *buf, size_t size, unsigned msg_flags);
1264 void *buf, size_t size, unsigned msg_flags); 1080extern int drbd_send_all(struct drbd_tconn *, struct socket *, void *, size_t,
1265extern int drbd_send_protocol(struct drbd_conf *mdev); 1081 unsigned);
1082
1083extern int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd);
1084extern int drbd_send_protocol(struct drbd_tconn *tconn);
1266extern int drbd_send_uuids(struct drbd_conf *mdev); 1085extern int drbd_send_uuids(struct drbd_conf *mdev);
1267extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); 1086extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev);
1268extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev); 1087extern void drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev);
1269extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); 1088extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags);
1270extern int drbd_send_state(struct drbd_conf *mdev, union drbd_state s); 1089extern int drbd_send_state(struct drbd_conf *mdev, union drbd_state s);
1271extern int drbd_send_current_state(struct drbd_conf *mdev); 1090extern int drbd_send_current_state(struct drbd_conf *mdev);
1272extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, 1091extern int drbd_send_sync_param(struct drbd_conf *mdev);
1273 enum drbd_packets cmd, struct p_header80 *h, 1092extern void drbd_send_b_ack(struct drbd_tconn *tconn, u32 barrier_nr,
1274 size_t size, unsigned msg_flags); 1093 u32 set_size);
1275#define USE_DATA_SOCKET 1 1094extern int drbd_send_ack(struct drbd_conf *, enum drbd_packet,
1276#define USE_META_SOCKET 0 1095 struct drbd_peer_request *);
1277extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, 1096extern void drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd,
1278 enum drbd_packets cmd, struct p_header80 *h, 1097 struct p_block_req *rp);
1279 size_t size); 1098extern void drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd,
1280extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, 1099 struct p_data *dp, int data_size);
1281 char *data, size_t size); 1100extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd,
1282extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc);
1283extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr,
1284 u32 set_size);
1285extern int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
1286 struct drbd_epoch_entry *e);
1287extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd,
1288 struct p_block_req *rp);
1289extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
1290 struct p_data *dp, int data_size);
1291extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
1292 sector_t sector, int blksize, u64 block_id); 1101 sector_t sector, int blksize, u64 block_id);
1293extern int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req); 1102extern int drbd_send_out_of_sync(struct drbd_conf *, struct drbd_request *);
1294extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, 1103extern int drbd_send_block(struct drbd_conf *, enum drbd_packet,
1295 struct drbd_epoch_entry *e); 1104 struct drbd_peer_request *);
1296extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req); 1105extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req);
1297extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd, 1106extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
1298 sector_t sector, int size, u64 block_id); 1107 sector_t sector, int size, u64 block_id);
1299extern int drbd_send_drequest_csum(struct drbd_conf *mdev, 1108extern int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector,
1300 sector_t sector,int size, 1109 int size, void *digest, int digest_size,
1301 void *digest, int digest_size, 1110 enum drbd_packet cmd);
1302 enum drbd_packets cmd);
1303extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size); 1111extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size);
1304 1112
1305extern int drbd_send_bitmap(struct drbd_conf *mdev); 1113extern int drbd_send_bitmap(struct drbd_conf *mdev);
1306extern int _drbd_send_bitmap(struct drbd_conf *mdev); 1114extern void drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode);
1307extern int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode); 1115extern void conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode);
1308extern void drbd_free_bc(struct drbd_backing_dev *ldev); 1116extern void drbd_free_bc(struct drbd_backing_dev *ldev);
1309extern void drbd_mdev_cleanup(struct drbd_conf *mdev); 1117extern void drbd_mdev_cleanup(struct drbd_conf *mdev);
1310void drbd_print_uuids(struct drbd_conf *mdev, const char *text); 1118void drbd_print_uuids(struct drbd_conf *mdev, const char *text);
1311 1119
1120extern void conn_md_sync(struct drbd_tconn *tconn);
1312extern void drbd_md_sync(struct drbd_conf *mdev); 1121extern void drbd_md_sync(struct drbd_conf *mdev);
1313extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev); 1122extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev);
1314extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); 1123extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
@@ -1334,33 +1143,52 @@ extern void drbd_queue_bitmap_io(struct drbd_conf *mdev,
1334extern int drbd_bitmap_io(struct drbd_conf *mdev, 1143extern int drbd_bitmap_io(struct drbd_conf *mdev,
1335 int (*io_fn)(struct drbd_conf *), 1144 int (*io_fn)(struct drbd_conf *),
1336 char *why, enum bm_flag flags); 1145 char *why, enum bm_flag flags);
1146extern int drbd_bitmap_io_from_worker(struct drbd_conf *mdev,
1147 int (*io_fn)(struct drbd_conf *),
1148 char *why, enum bm_flag flags);
1337extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); 1149extern int drbd_bmio_set_n_write(struct drbd_conf *mdev);
1338extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); 1150extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
1339extern void drbd_go_diskless(struct drbd_conf *mdev); 1151extern void drbd_go_diskless(struct drbd_conf *mdev);
1340extern void drbd_ldev_destroy(struct drbd_conf *mdev); 1152extern void drbd_ldev_destroy(struct drbd_conf *mdev);
1341 1153
1342
1343/* Meta data layout 1154/* Meta data layout
1344 We reserve a 128MB Block (4k aligned) 1155 We reserve a 128MB Block (4k aligned)
1345 * either at the end of the backing device 1156 * either at the end of the backing device
1346 * or on a separate meta data device. */ 1157 * or on a separate meta data device. */
1347 1158
1348#define MD_RESERVED_SECT (128LU << 11) /* 128 MB, unit sectors */
1349/* The following numbers are sectors */ 1159/* The following numbers are sectors */
1350#define MD_AL_OFFSET 8 /* 8 Sectors after start of meta area */ 1160/* Allows up to about 3.8TB, so if you want more,
1351#define MD_AL_MAX_SIZE 64 /* = 32 kb LOG ~ 3776 extents ~ 14 GB Storage */ 1161 * you need to use the "flexible" meta data format. */
1352/* Allows up to about 3.8TB */ 1162#define MD_RESERVED_SECT (128LU << 11) /* 128 MB, unit sectors */
1353#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_MAX_SIZE) 1163#define MD_AL_OFFSET 8 /* 8 Sectors after start of meta area */
1354 1164#define MD_AL_SECTORS 64 /* = 32 kB on disk activity log ring buffer */
1355/* Since the smalles IO unit is usually 512 byte */ 1165#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_SECTORS)
1356#define MD_SECTOR_SHIFT 9 1166
1357#define MD_SECTOR_SIZE (1<<MD_SECTOR_SHIFT) 1167/* we do all meta data IO in 4k blocks */
1358 1168#define MD_BLOCK_SHIFT 12
1359/* activity log */ 1169#define MD_BLOCK_SIZE (1<<MD_BLOCK_SHIFT)
1360#define AL_EXTENTS_PT ((MD_SECTOR_SIZE-12)/8-1) /* 61 ; Extents per 512B sector */ 1170
1361#define AL_EXTENT_SHIFT 22 /* One extent represents 4M Storage */ 1171/* One activity log extent represents 4M of storage */
1172#define AL_EXTENT_SHIFT 22
1362#define AL_EXTENT_SIZE (1<<AL_EXTENT_SHIFT) 1173#define AL_EXTENT_SIZE (1<<AL_EXTENT_SHIFT)
1363 1174
1175/* We could make these currently hardcoded constants configurable
1176 * variables at create-md time (or even re-configurable at runtime?).
1177 * Which will require some more changes to the DRBD "super block"
1178 * and attach code.
1179 *
1180 * updates per transaction:
1181 * This many changes to the active set can be logged with one transaction.
1182 * This number is arbitrary.
1183 * context per transaction:
1184 * This many context extent numbers are logged with each transaction.
1185 * This number is resulting from the transaction block size (4k), the layout
1186 * of the transaction header, and the number of updates per transaction.
1187 * See drbd_actlog.c:struct al_transaction_on_disk
1188 * */
1189#define AL_UPDATES_PER_TRANSACTION 64 // arbitrary
1190#define AL_CONTEXT_PER_TRANSACTION 919 // (4096 - 36 - 6*64)/4
1191
1364#if BITS_PER_LONG == 32 1192#if BITS_PER_LONG == 32
1365#define LN2_BPL 5 1193#define LN2_BPL 5
1366#define cpu_to_lel(A) cpu_to_le32(A) 1194#define cpu_to_lel(A) cpu_to_le32(A)
@@ -1396,11 +1224,14 @@ struct bm_extent {
1396 1224
1397#define SLEEP_TIME (HZ/10) 1225#define SLEEP_TIME (HZ/10)
1398 1226
1399#define BM_BLOCK_SHIFT 12 /* 4k per bit */ 1227/* We do bitmap IO in units of 4k blocks.
1228 * We also still have a hardcoded 4k per bit relation. */
1229#define BM_BLOCK_SHIFT 12 /* 4k per bit */
1400#define BM_BLOCK_SIZE (1<<BM_BLOCK_SHIFT) 1230#define BM_BLOCK_SIZE (1<<BM_BLOCK_SHIFT)
1401/* (9+3) : 512 bytes @ 8 bits; representing 16M storage 1231/* mostly arbitrarily set the represented size of one bitmap extent,
1402 * per sector of on disk bitmap */ 1232 * aka resync extent, to 16 MiB (which is also 512 Byte worth of bitmap
1403#define BM_EXT_SHIFT (BM_BLOCK_SHIFT + MD_SECTOR_SHIFT + 3) /* = 24 */ 1233 * at 4k per bit resolution) */
1234#define BM_EXT_SHIFT 24 /* 16 MiB per resync extent */
1404#define BM_EXT_SIZE (1<<BM_EXT_SHIFT) 1235#define BM_EXT_SIZE (1<<BM_EXT_SHIFT)
1405 1236
1406#if (BM_EXT_SHIFT != 24) || (BM_BLOCK_SHIFT != 12) 1237#if (BM_EXT_SHIFT != 24) || (BM_BLOCK_SHIFT != 12)
@@ -1468,17 +1299,20 @@ struct bm_extent {
1468#endif 1299#endif
1469#endif 1300#endif
1470 1301
1471/* Sector shift value for the "hash" functions of tl_hash and ee_hash tables. 1302/* BIO_MAX_SIZE is 256 * PAGE_CACHE_SIZE,
1472 * With a value of 8 all IO in one 128K block make it to the same slot of the 1303 * so for typical PAGE_CACHE_SIZE of 4k, that is (1<<20) Byte.
1473 * hash table. */ 1304 * Since we may live in a mixed-platform cluster,
1474#define HT_SHIFT 8 1305 * we limit us to a platform agnostic constant here for now.
1475#define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT)) 1306 * A followup commit may allow even bigger BIO sizes,
1307 * once we thought that through. */
1308#define DRBD_MAX_BIO_SIZE (1U << 20)
1309#if DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE
1310#error Architecture not supported: DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE
1311#endif
1476#define DRBD_MAX_BIO_SIZE_SAFE (1U << 12) /* Works always = 4k */ 1312#define DRBD_MAX_BIO_SIZE_SAFE (1U << 12) /* Works always = 4k */
1477 1313
1478#define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* The old header only allows packets up to 32Kib data */ 1314#define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* Header 80 only allows packets up to 32KiB data */
1479 1315#define DRBD_MAX_BIO_SIZE_P95 (1U << 17) /* Protocol 95 to 99 allows bios up to 128KiB */
1480/* Number of elements in the app_reads_hash */
1481#define APP_R_HSIZE 15
1482 1316
1483extern int drbd_bm_init(struct drbd_conf *mdev); 1317extern int drbd_bm_init(struct drbd_conf *mdev);
1484extern int drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors, int set_new_bits); 1318extern int drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors, int set_new_bits);
@@ -1500,11 +1334,11 @@ extern int drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr);
1500extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr); 1334extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr);
1501extern int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local); 1335extern int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local);
1502extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); 1336extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local);
1337extern void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr);
1503extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); 1338extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local);
1339extern int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local);
1504extern int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local); 1340extern int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local);
1505extern int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local); 1341extern int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local);
1506extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev,
1507 unsigned long al_enr);
1508extern size_t drbd_bm_words(struct drbd_conf *mdev); 1342extern size_t drbd_bm_words(struct drbd_conf *mdev);
1509extern unsigned long drbd_bm_bits(struct drbd_conf *mdev); 1343extern unsigned long drbd_bm_bits(struct drbd_conf *mdev);
1510extern sector_t drbd_bm_capacity(struct drbd_conf *mdev); 1344extern sector_t drbd_bm_capacity(struct drbd_conf *mdev);
@@ -1529,7 +1363,7 @@ extern void drbd_bm_unlock(struct drbd_conf *mdev);
1529/* drbd_main.c */ 1363/* drbd_main.c */
1530 1364
1531extern struct kmem_cache *drbd_request_cache; 1365extern struct kmem_cache *drbd_request_cache;
1532extern struct kmem_cache *drbd_ee_cache; /* epoch entries */ 1366extern struct kmem_cache *drbd_ee_cache; /* peer requests */
1533extern struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */ 1367extern struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */
1534extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ 1368extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
1535extern mempool_t *drbd_request_mempool; 1369extern mempool_t *drbd_request_mempool;
@@ -1569,12 +1403,22 @@ extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
1569 1403
1570extern rwlock_t global_state_lock; 1404extern rwlock_t global_state_lock;
1571 1405
1572extern struct drbd_conf *drbd_new_device(unsigned int minor); 1406extern int conn_lowest_minor(struct drbd_tconn *tconn);
1573extern void drbd_free_mdev(struct drbd_conf *mdev); 1407enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr);
1408extern void drbd_minor_destroy(struct kref *kref);
1409
1410extern int set_resource_options(struct drbd_tconn *tconn, struct res_opts *res_opts);
1411extern struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts);
1412extern void conn_destroy(struct kref *kref);
1413struct drbd_tconn *conn_get_by_name(const char *name);
1414extern struct drbd_tconn *conn_get_by_addrs(void *my_addr, int my_addr_len,
1415 void *peer_addr, int peer_addr_len);
1416extern void conn_free_crypto(struct drbd_tconn *tconn);
1574 1417
1575extern int proc_details; 1418extern int proc_details;
1576 1419
1577/* drbd_req */ 1420/* drbd_req */
1421extern void __drbd_make_request(struct drbd_conf *, struct bio *, unsigned long);
1578extern void drbd_make_request(struct request_queue *q, struct bio *bio); 1422extern void drbd_make_request(struct request_queue *q, struct bio *bio);
1579extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req); 1423extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req);
1580extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec); 1424extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec);
@@ -1582,10 +1426,11 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t);
1582 1426
1583 1427
1584/* drbd_nl.c */ 1428/* drbd_nl.c */
1429extern int drbd_msg_put_info(const char *info);
1585extern void drbd_suspend_io(struct drbd_conf *mdev); 1430extern void drbd_suspend_io(struct drbd_conf *mdev);
1586extern void drbd_resume_io(struct drbd_conf *mdev); 1431extern void drbd_resume_io(struct drbd_conf *mdev);
1587extern char *ppsize(char *buf, unsigned long long size); 1432extern char *ppsize(char *buf, unsigned long long size);
1588extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int); 1433extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, sector_t, int);
1589enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 }; 1434enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
1590extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); 1435extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
1591extern void resync_after_online_grow(struct drbd_conf *); 1436extern void resync_after_online_grow(struct drbd_conf *);
@@ -1593,13 +1438,14 @@ extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
1593extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev, 1438extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
1594 enum drbd_role new_role, 1439 enum drbd_role new_role,
1595 int force); 1440 int force);
1596extern enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev); 1441extern bool conn_try_outdate_peer(struct drbd_tconn *tconn);
1597extern void drbd_try_outdate_peer_async(struct drbd_conf *mdev); 1442extern void conn_try_outdate_peer_async(struct drbd_tconn *tconn);
1598extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); 1443extern int drbd_khelper(struct drbd_conf *mdev, char *cmd);
1599 1444
1600/* drbd_worker.c */ 1445/* drbd_worker.c */
1601extern int drbd_worker(struct drbd_thread *thi); 1446extern int drbd_worker(struct drbd_thread *thi);
1602extern int drbd_alter_sa(struct drbd_conf *mdev, int na); 1447enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor);
1448void drbd_resync_after_changed(struct drbd_conf *mdev);
1603extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side); 1449extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side);
1604extern void resume_next_sg(struct drbd_conf *mdev); 1450extern void resume_next_sg(struct drbd_conf *mdev);
1605extern void suspend_other_sg(struct drbd_conf *mdev); 1451extern void suspend_other_sg(struct drbd_conf *mdev);
@@ -1608,13 +1454,13 @@ extern int drbd_resync_finished(struct drbd_conf *mdev);
1608extern void *drbd_md_get_buffer(struct drbd_conf *mdev); 1454extern void *drbd_md_get_buffer(struct drbd_conf *mdev);
1609extern void drbd_md_put_buffer(struct drbd_conf *mdev); 1455extern void drbd_md_put_buffer(struct drbd_conf *mdev);
1610extern int drbd_md_sync_page_io(struct drbd_conf *mdev, 1456extern int drbd_md_sync_page_io(struct drbd_conf *mdev,
1611 struct drbd_backing_dev *bdev, sector_t sector, int rw); 1457 struct drbd_backing_dev *bdev, sector_t sector, int rw);
1458extern void drbd_ov_out_of_sync_found(struct drbd_conf *, sector_t, int);
1612extern void wait_until_done_or_force_detached(struct drbd_conf *mdev, 1459extern void wait_until_done_or_force_detached(struct drbd_conf *mdev,
1613 struct drbd_backing_dev *bdev, unsigned int *done); 1460 struct drbd_backing_dev *bdev, unsigned int *done);
1614extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int);
1615extern void drbd_rs_controller_reset(struct drbd_conf *mdev); 1461extern void drbd_rs_controller_reset(struct drbd_conf *mdev);
1616 1462
1617static inline void ov_oos_print(struct drbd_conf *mdev) 1463static inline void ov_out_of_sync_print(struct drbd_conf *mdev)
1618{ 1464{
1619 if (mdev->ov_last_oos_size) { 1465 if (mdev->ov_last_oos_size) {
1620 dev_err(DEV, "Out of sync: start=%llu, size=%lu (sectors)\n", 1466 dev_err(DEV, "Out of sync: start=%llu, size=%lu (sectors)\n",
@@ -1626,97 +1472,102 @@ static inline void ov_oos_print(struct drbd_conf *mdev)
1626 1472
1627 1473
1628extern void drbd_csum_bio(struct drbd_conf *, struct crypto_hash *, struct bio *, void *); 1474extern void drbd_csum_bio(struct drbd_conf *, struct crypto_hash *, struct bio *, void *);
1629extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *, struct drbd_epoch_entry *, void *); 1475extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *,
1476 struct drbd_peer_request *, void *);
1630/* worker callbacks */ 1477/* worker callbacks */
1631extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int); 1478extern int w_e_end_data_req(struct drbd_work *, int);
1632extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int); 1479extern int w_e_end_rsdata_req(struct drbd_work *, int);
1633extern int w_e_end_data_req(struct drbd_conf *, struct drbd_work *, int); 1480extern int w_e_end_csum_rs_req(struct drbd_work *, int);
1634extern int w_e_end_rsdata_req(struct drbd_conf *, struct drbd_work *, int); 1481extern int w_e_end_ov_reply(struct drbd_work *, int);
1635extern int w_e_end_csum_rs_req(struct drbd_conf *, struct drbd_work *, int); 1482extern int w_e_end_ov_req(struct drbd_work *, int);
1636extern int w_e_end_ov_reply(struct drbd_conf *, struct drbd_work *, int); 1483extern int w_ov_finished(struct drbd_work *, int);
1637extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int); 1484extern int w_resync_timer(struct drbd_work *, int);
1638extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); 1485extern int w_send_write_hint(struct drbd_work *, int);
1639extern int w_resync_timer(struct drbd_conf *, struct drbd_work *, int); 1486extern int w_make_resync_request(struct drbd_work *, int);
1640extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); 1487extern int w_send_dblock(struct drbd_work *, int);
1641extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); 1488extern int w_send_read_req(struct drbd_work *, int);
1642extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); 1489extern int w_prev_work_done(struct drbd_work *, int);
1643extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int); 1490extern int w_e_reissue(struct drbd_work *, int);
1644extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int); 1491extern int w_restart_disk_io(struct drbd_work *, int);
1645extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int); 1492extern int w_send_out_of_sync(struct drbd_work *, int);
1646extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int); 1493extern int w_start_resync(struct drbd_work *, int);
1647extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int);
1648extern int w_send_oos(struct drbd_conf *, struct drbd_work *, int);
1649extern int w_start_resync(struct drbd_conf *, struct drbd_work *, int);
1650 1494
1651extern void resync_timer_fn(unsigned long data); 1495extern void resync_timer_fn(unsigned long data);
1652extern void start_resync_timer_fn(unsigned long data); 1496extern void start_resync_timer_fn(unsigned long data);
1653 1497
1654/* drbd_receiver.c */ 1498/* drbd_receiver.c */
1655extern int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector); 1499extern int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector);
1656extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, 1500extern int drbd_submit_peer_request(struct drbd_conf *,
1657 const unsigned rw, const int fault_type); 1501 struct drbd_peer_request *, const unsigned,
1658extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); 1502 const int);
1659extern struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, 1503extern int drbd_free_peer_reqs(struct drbd_conf *, struct list_head *);
1660 u64 id, 1504extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_conf *, u64,
1661 sector_t sector, 1505 sector_t, unsigned int,
1662 unsigned int data_size, 1506 gfp_t) __must_hold(local);
1663 gfp_t gfp_mask) __must_hold(local); 1507extern void __drbd_free_peer_req(struct drbd_conf *, struct drbd_peer_request *,
1664extern void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, 1508 int);
1665 int is_net); 1509#define drbd_free_peer_req(m,e) __drbd_free_peer_req(m, e, 0)
1666#define drbd_free_ee(m,e) drbd_free_some_ee(m, e, 0) 1510#define drbd_free_net_peer_req(m,e) __drbd_free_peer_req(m, e, 1)
1667#define drbd_free_net_ee(m,e) drbd_free_some_ee(m, e, 1) 1511extern struct page *drbd_alloc_pages(struct drbd_conf *, unsigned int, bool);
1668extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
1669 struct list_head *head);
1670extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
1671 struct list_head *head);
1672extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled); 1512extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled);
1673extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed); 1513extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed);
1674extern void drbd_flush_workqueue(struct drbd_conf *mdev); 1514extern void conn_flush_workqueue(struct drbd_tconn *tconn);
1675extern void drbd_free_tl_hash(struct drbd_conf *mdev); 1515extern int drbd_connected(struct drbd_conf *mdev);
1516static inline void drbd_flush_workqueue(struct drbd_conf *mdev)
1517{
1518 conn_flush_workqueue(mdev->tconn);
1519}
1676 1520
1677/* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to 1521/* Yes, there is kernel_setsockopt, but only since 2.6.18.
1678 * mess with get_fs/set_fs, we know we are KERNEL_DS always. */ 1522 * So we have our own copy of it here. */
1679static inline int drbd_setsockopt(struct socket *sock, int level, int optname, 1523static inline int drbd_setsockopt(struct socket *sock, int level, int optname,
1680 char __user *optval, int optlen) 1524 char *optval, int optlen)
1681{ 1525{
1526 mm_segment_t oldfs = get_fs();
1527 char __user *uoptval;
1682 int err; 1528 int err;
1529
1530 uoptval = (char __user __force *)optval;
1531
1532 set_fs(KERNEL_DS);
1683 if (level == SOL_SOCKET) 1533 if (level == SOL_SOCKET)
1684 err = sock_setsockopt(sock, level, optname, optval, optlen); 1534 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
1685 else 1535 else
1686 err = sock->ops->setsockopt(sock, level, optname, optval, 1536 err = sock->ops->setsockopt(sock, level, optname, uoptval,
1687 optlen); 1537 optlen);
1538 set_fs(oldfs);
1688 return err; 1539 return err;
1689} 1540}
1690 1541
1691static inline void drbd_tcp_cork(struct socket *sock) 1542static inline void drbd_tcp_cork(struct socket *sock)
1692{ 1543{
1693 int __user val = 1; 1544 int val = 1;
1694 (void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK, 1545 (void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK,
1695 (char __user *)&val, sizeof(val)); 1546 (char*)&val, sizeof(val));
1696} 1547}
1697 1548
1698static inline void drbd_tcp_uncork(struct socket *sock) 1549static inline void drbd_tcp_uncork(struct socket *sock)
1699{ 1550{
1700 int __user val = 0; 1551 int val = 0;
1701 (void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK, 1552 (void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK,
1702 (char __user *)&val, sizeof(val)); 1553 (char*)&val, sizeof(val));
1703} 1554}
1704 1555
1705static inline void drbd_tcp_nodelay(struct socket *sock) 1556static inline void drbd_tcp_nodelay(struct socket *sock)
1706{ 1557{
1707 int __user val = 1; 1558 int val = 1;
1708 (void) drbd_setsockopt(sock, SOL_TCP, TCP_NODELAY, 1559 (void) drbd_setsockopt(sock, SOL_TCP, TCP_NODELAY,
1709 (char __user *)&val, sizeof(val)); 1560 (char*)&val, sizeof(val));
1710} 1561}
1711 1562
1712static inline void drbd_tcp_quickack(struct socket *sock) 1563static inline void drbd_tcp_quickack(struct socket *sock)
1713{ 1564{
1714 int __user val = 2; 1565 int val = 2;
1715 (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK, 1566 (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK,
1716 (char __user *)&val, sizeof(val)); 1567 (char*)&val, sizeof(val));
1717} 1568}
1718 1569
1719void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo); 1570void drbd_bump_write_ordering(struct drbd_tconn *tconn, enum write_ordering_e wo);
1720 1571
1721/* drbd_proc.c */ 1572/* drbd_proc.c */
1722extern struct proc_dir_entry *drbd_proc; 1573extern struct proc_dir_entry *drbd_proc;
@@ -1725,8 +1576,8 @@ extern const char *drbd_conn_str(enum drbd_conns s);
1725extern const char *drbd_role_str(enum drbd_role s); 1576extern const char *drbd_role_str(enum drbd_role s);
1726 1577
1727/* drbd_actlog.c */ 1578/* drbd_actlog.c */
1728extern void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector); 1579extern void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i);
1729extern void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector); 1580extern void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i);
1730extern void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector); 1581extern void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector);
1731extern int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector); 1582extern int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector);
1732extern int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector); 1583extern int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector);
@@ -1734,7 +1585,6 @@ extern void drbd_rs_cancel_all(struct drbd_conf *mdev);
1734extern int drbd_rs_del_all(struct drbd_conf *mdev); 1585extern int drbd_rs_del_all(struct drbd_conf *mdev);
1735extern void drbd_rs_failed_io(struct drbd_conf *mdev, 1586extern void drbd_rs_failed_io(struct drbd_conf *mdev,
1736 sector_t sector, int size); 1587 sector_t sector, int size);
1737extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *);
1738extern void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go); 1588extern void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go);
1739extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, 1589extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector,
1740 int size, const char *file, const unsigned int line); 1590 int size, const char *file, const unsigned int line);
@@ -1744,73 +1594,24 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
1744 int size, const char *file, const unsigned int line); 1594 int size, const char *file, const unsigned int line);
1745#define drbd_set_out_of_sync(mdev, sector, size) \ 1595#define drbd_set_out_of_sync(mdev, sector, size) \
1746 __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__) 1596 __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
1747extern void drbd_al_apply_to_bm(struct drbd_conf *mdev);
1748extern void drbd_al_shrink(struct drbd_conf *mdev); 1597extern void drbd_al_shrink(struct drbd_conf *mdev);
1749 1598
1750
1751/* drbd_nl.c */ 1599/* drbd_nl.c */
1752 1600/* state info broadcast */
1753void drbd_nl_cleanup(void); 1601struct sib_info {
1754int __init drbd_nl_init(void); 1602 enum drbd_state_info_bcast_reason sib_reason;
1755void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state); 1603 union {
1756void drbd_bcast_sync_progress(struct drbd_conf *mdev); 1604 struct {
1757void drbd_bcast_ee(struct drbd_conf *mdev, 1605 char *helper_name;
1758 const char *reason, const int dgs, 1606 unsigned helper_exit_code;
1759 const char* seen_hash, const char* calc_hash, 1607 };
1760 const struct drbd_epoch_entry* e); 1608 struct {
1761 1609 union drbd_state os;
1762 1610 union drbd_state ns;
1763/** 1611 };
1764 * DOC: DRBD State macros 1612 };
1765 * 1613};
1766 * These macros are used to express state changes in easily readable form. 1614void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib);
1767 *
1768 * The NS macros expand to a mask and a value, that can be bit ored onto the
1769 * current state as soon as the spinlock (req_lock) was taken.
1770 *
1771 * The _NS macros are used for state functions that get called with the
1772 * spinlock. These macros expand directly to the new state value.
1773 *
1774 * Besides the basic forms NS() and _NS() additional _?NS[23] are defined
1775 * to express state changes that affect more than one aspect of the state.
1776 *
1777 * E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY)
1778 * Means that the network connection was established and that the peer
1779 * is in secondary role.
1780 */
1781#define role_MASK R_MASK
1782#define peer_MASK R_MASK
1783#define disk_MASK D_MASK
1784#define pdsk_MASK D_MASK
1785#define conn_MASK C_MASK
1786#define susp_MASK 1
1787#define user_isp_MASK 1
1788#define aftr_isp_MASK 1
1789#define susp_nod_MASK 1
1790#define susp_fen_MASK 1
1791
1792#define NS(T, S) \
1793 ({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
1794 ({ union drbd_state val; val.i = 0; val.T = (S); val; })
1795#define NS2(T1, S1, T2, S2) \
1796 ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
1797 mask.T2 = T2##_MASK; mask; }), \
1798 ({ union drbd_state val; val.i = 0; val.T1 = (S1); \
1799 val.T2 = (S2); val; })
1800#define NS3(T1, S1, T2, S2, T3, S3) \
1801 ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
1802 mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \
1803 ({ union drbd_state val; val.i = 0; val.T1 = (S1); \
1804 val.T2 = (S2); val.T3 = (S3); val; })
1805
1806#define _NS(D, T, S) \
1807 D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T = (S); __ns; })
1808#define _NS2(D, T1, S1, T2, S2) \
1809 D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \
1810 __ns.T2 = (S2); __ns; })
1811#define _NS3(D, T1, S1, T2, S2, T3, S3) \
1812 D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \
1813 __ns.T2 = (S2); __ns.T3 = (S3); __ns; })
1814 1615
1815/* 1616/*
1816 * inline helper functions 1617 * inline helper functions
@@ -1827,9 +1628,10 @@ static inline struct page *page_chain_next(struct page *page)
1827#define page_chain_for_each_safe(page, n) \ 1628#define page_chain_for_each_safe(page, n) \
1828 for (; page && ({ n = page_chain_next(page); 1; }); page = n) 1629 for (; page && ({ n = page_chain_next(page); 1; }); page = n)
1829 1630
1830static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e) 1631
1632static inline int drbd_peer_req_has_active_page(struct drbd_peer_request *peer_req)
1831{ 1633{
1832 struct page *page = e->pages; 1634 struct page *page = peer_req->pages;
1833 page_chain_for_each(page) { 1635 page_chain_for_each(page) {
1834 if (page_count(page) > 1) 1636 if (page_count(page) > 1)
1835 return 1; 1637 return 1;
@@ -1837,18 +1639,6 @@ static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e)
1837 return 0; 1639 return 0;
1838} 1640}
1839 1641
1840static inline void drbd_state_lock(struct drbd_conf *mdev)
1841{
1842 wait_event(mdev->misc_wait,
1843 !drbd_test_and_set_flag(mdev, CLUSTER_ST_CHANGE));
1844}
1845
1846static inline void drbd_state_unlock(struct drbd_conf *mdev)
1847{
1848 drbd_clear_flag(mdev, CLUSTER_ST_CHANGE);
1849 wake_up(&mdev->misc_wait);
1850}
1851
1852static inline enum drbd_state_rv 1642static inline enum drbd_state_rv
1853_drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, 1643_drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
1854 enum chg_state_flags flags, struct completion *done) 1644 enum chg_state_flags flags, struct completion *done)
@@ -1862,21 +1652,16 @@ _drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
1862 return rv; 1652 return rv;
1863} 1653}
1864 1654
1865/** 1655static inline union drbd_state drbd_read_state(struct drbd_conf *mdev)
1866 * drbd_request_state() - Reqest a state change
1867 * @mdev: DRBD device.
1868 * @mask: mask of state bits to change.
1869 * @val: value of new state bits.
1870 *
1871 * This is the most graceful way of requesting a state change. It is verbose
1872 * quite verbose in case the state change is not possible, and all those
1873 * state changes are globally serialized.
1874 */
1875static inline int drbd_request_state(struct drbd_conf *mdev,
1876 union drbd_state mask,
1877 union drbd_state val)
1878{ 1656{
1879 return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED); 1657 union drbd_state rv;
1658
1659 rv.i = mdev->state.i;
1660 rv.susp = mdev->tconn->susp;
1661 rv.susp_nod = mdev->tconn->susp_nod;
1662 rv.susp_fen = mdev->tconn->susp_fen;
1663
1664 return rv;
1880} 1665}
1881 1666
1882enum drbd_force_detach_flags { 1667enum drbd_force_detach_flags {
@@ -1891,8 +1676,13 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev,
1891 enum drbd_force_detach_flags df, 1676 enum drbd_force_detach_flags df,
1892 const char *where) 1677 const char *where)
1893{ 1678{
1894 switch (mdev->ldev->dc.on_io_error) { 1679 enum drbd_io_error_p ep;
1895 case EP_PASS_ON: 1680
1681 rcu_read_lock();
1682 ep = rcu_dereference(mdev->ldev->disk_conf)->on_io_error;
1683 rcu_read_unlock();
1684 switch (ep) {
1685 case EP_PASS_ON: /* FIXME would this be better named "Ignore"? */
1896 if (df == DRBD_READ_ERROR || df == DRBD_WRITE_ERROR) { 1686 if (df == DRBD_READ_ERROR || df == DRBD_WRITE_ERROR) {
1897 if (__ratelimit(&drbd_ratelimit_state)) 1687 if (__ratelimit(&drbd_ratelimit_state))
1898 dev_err(DEV, "Local IO failed in %s.\n", where); 1688 dev_err(DEV, "Local IO failed in %s.\n", where);
@@ -1923,11 +1713,11 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev,
1923 * we read meta data only once during attach, 1713 * we read meta data only once during attach,
1924 * which will fail in case of errors. 1714 * which will fail in case of errors.
1925 */ 1715 */
1926 drbd_set_flag(mdev, WAS_IO_ERROR); 1716 set_bit(WAS_IO_ERROR, &mdev->flags);
1927 if (df == DRBD_READ_ERROR) 1717 if (df == DRBD_READ_ERROR)
1928 drbd_set_flag(mdev, WAS_READ_ERROR); 1718 set_bit(WAS_READ_ERROR, &mdev->flags);
1929 if (df == DRBD_FORCE_DETACH) 1719 if (df == DRBD_FORCE_DETACH)
1930 drbd_set_flag(mdev, FORCE_DETACH); 1720 set_bit(FORCE_DETACH, &mdev->flags);
1931 if (mdev->state.disk > D_FAILED) { 1721 if (mdev->state.disk > D_FAILED) {
1932 _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL); 1722 _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL);
1933 dev_err(DEV, 1723 dev_err(DEV,
@@ -1951,9 +1741,9 @@ static inline void drbd_chk_io_error_(struct drbd_conf *mdev,
1951{ 1741{
1952 if (error) { 1742 if (error) {
1953 unsigned long flags; 1743 unsigned long flags;
1954 spin_lock_irqsave(&mdev->req_lock, flags); 1744 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
1955 __drbd_chk_io_error_(mdev, forcedetach, where); 1745 __drbd_chk_io_error_(mdev, forcedetach, where);
1956 spin_unlock_irqrestore(&mdev->req_lock, flags); 1746 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
1957 } 1747 }
1958} 1748}
1959 1749
@@ -1965,9 +1755,9 @@ static inline void drbd_chk_io_error_(struct drbd_conf *mdev,
1965 * BTW, for internal meta data, this happens to be the maximum capacity 1755 * BTW, for internal meta data, this happens to be the maximum capacity
1966 * we could agree upon with our peer node. 1756 * we could agree upon with our peer node.
1967 */ 1757 */
1968static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev) 1758static inline sector_t _drbd_md_first_sector(int meta_dev_idx, struct drbd_backing_dev *bdev)
1969{ 1759{
1970 switch (bdev->dc.meta_dev_idx) { 1760 switch (meta_dev_idx) {
1971 case DRBD_MD_INDEX_INTERNAL: 1761 case DRBD_MD_INDEX_INTERNAL:
1972 case DRBD_MD_INDEX_FLEX_INT: 1762 case DRBD_MD_INDEX_FLEX_INT:
1973 return bdev->md.md_offset + bdev->md.bm_offset; 1763 return bdev->md.md_offset + bdev->md.bm_offset;
@@ -1977,13 +1767,30 @@ static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev)
1977 } 1767 }
1978} 1768}
1979 1769
1770static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev)
1771{
1772 int meta_dev_idx;
1773
1774 rcu_read_lock();
1775 meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
1776 rcu_read_unlock();
1777
1778 return _drbd_md_first_sector(meta_dev_idx, bdev);
1779}
1780
1980/** 1781/**
1981 * drbd_md_last_sector() - Return the last sector number of the meta data area 1782 * drbd_md_last_sector() - Return the last sector number of the meta data area
1982 * @bdev: Meta data block device. 1783 * @bdev: Meta data block device.
1983 */ 1784 */
1984static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev) 1785static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev)
1985{ 1786{
1986 switch (bdev->dc.meta_dev_idx) { 1787 int meta_dev_idx;
1788
1789 rcu_read_lock();
1790 meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
1791 rcu_read_unlock();
1792
1793 switch (meta_dev_idx) {
1987 case DRBD_MD_INDEX_INTERNAL: 1794 case DRBD_MD_INDEX_INTERNAL:
1988 case DRBD_MD_INDEX_FLEX_INT: 1795 case DRBD_MD_INDEX_FLEX_INT:
1989 return bdev->md.md_offset + MD_AL_OFFSET - 1; 1796 return bdev->md.md_offset + MD_AL_OFFSET - 1;
@@ -2011,12 +1818,18 @@ static inline sector_t drbd_get_capacity(struct block_device *bdev)
2011static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev) 1818static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev)
2012{ 1819{
2013 sector_t s; 1820 sector_t s;
2014 switch (bdev->dc.meta_dev_idx) { 1821 int meta_dev_idx;
1822
1823 rcu_read_lock();
1824 meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
1825 rcu_read_unlock();
1826
1827 switch (meta_dev_idx) {
2015 case DRBD_MD_INDEX_INTERNAL: 1828 case DRBD_MD_INDEX_INTERNAL:
2016 case DRBD_MD_INDEX_FLEX_INT: 1829 case DRBD_MD_INDEX_FLEX_INT:
2017 s = drbd_get_capacity(bdev->backing_bdev) 1830 s = drbd_get_capacity(bdev->backing_bdev)
2018 ? min_t(sector_t, DRBD_MAX_SECTORS_FLEX, 1831 ? min_t(sector_t, DRBD_MAX_SECTORS_FLEX,
2019 drbd_md_first_sector(bdev)) 1832 _drbd_md_first_sector(meta_dev_idx, bdev))
2020 : 0; 1833 : 0;
2021 break; 1834 break;
2022 case DRBD_MD_INDEX_FLEX_EXT: 1835 case DRBD_MD_INDEX_FLEX_EXT:
@@ -2042,9 +1855,15 @@ static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev)
2042static inline sector_t drbd_md_ss__(struct drbd_conf *mdev, 1855static inline sector_t drbd_md_ss__(struct drbd_conf *mdev,
2043 struct drbd_backing_dev *bdev) 1856 struct drbd_backing_dev *bdev)
2044{ 1857{
2045 switch (bdev->dc.meta_dev_idx) { 1858 int meta_dev_idx;
1859
1860 rcu_read_lock();
1861 meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
1862 rcu_read_unlock();
1863
1864 switch (meta_dev_idx) {
2046 default: /* external, some index */ 1865 default: /* external, some index */
2047 return MD_RESERVED_SECT * bdev->dc.meta_dev_idx; 1866 return MD_RESERVED_SECT * meta_dev_idx;
2048 case DRBD_MD_INDEX_INTERNAL: 1867 case DRBD_MD_INDEX_INTERNAL:
2049 /* with drbd08, internal meta data is always "flexible" */ 1868 /* with drbd08, internal meta data is always "flexible" */
2050 case DRBD_MD_INDEX_FLEX_INT: 1869 case DRBD_MD_INDEX_FLEX_INT:
@@ -2070,9 +1889,8 @@ drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w)
2070 unsigned long flags; 1889 unsigned long flags;
2071 spin_lock_irqsave(&q->q_lock, flags); 1890 spin_lock_irqsave(&q->q_lock, flags);
2072 list_add(&w->list, &q->q); 1891 list_add(&w->list, &q->q);
2073 up(&q->s); /* within the spinlock,
2074 see comment near end of drbd_worker() */
2075 spin_unlock_irqrestore(&q->q_lock, flags); 1892 spin_unlock_irqrestore(&q->q_lock, flags);
1893 wake_up(&q->q_wait);
2076} 1894}
2077 1895
2078static inline void 1896static inline void
@@ -2081,41 +1899,35 @@ drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
2081 unsigned long flags; 1899 unsigned long flags;
2082 spin_lock_irqsave(&q->q_lock, flags); 1900 spin_lock_irqsave(&q->q_lock, flags);
2083 list_add_tail(&w->list, &q->q); 1901 list_add_tail(&w->list, &q->q);
2084 up(&q->s); /* within the spinlock,
2085 see comment near end of drbd_worker() */
2086 spin_unlock_irqrestore(&q->q_lock, flags); 1902 spin_unlock_irqrestore(&q->q_lock, flags);
1903 wake_up(&q->q_wait);
2087} 1904}
2088 1905
2089static inline void wake_asender(struct drbd_conf *mdev) 1906static inline void wake_asender(struct drbd_tconn *tconn)
2090{
2091 if (drbd_test_flag(mdev, SIGNAL_ASENDER))
2092 force_sig(DRBD_SIG, mdev->asender.task);
2093}
2094
2095static inline void request_ping(struct drbd_conf *mdev)
2096{ 1907{
2097 drbd_set_flag(mdev, SEND_PING); 1908 if (test_bit(SIGNAL_ASENDER, &tconn->flags))
2098 wake_asender(mdev); 1909 force_sig(DRBD_SIG, tconn->asender.task);
2099} 1910}
2100 1911
2101static inline int drbd_send_short_cmd(struct drbd_conf *mdev, 1912static inline void request_ping(struct drbd_tconn *tconn)
2102 enum drbd_packets cmd)
2103{ 1913{
2104 struct p_header80 h; 1914 set_bit(SEND_PING, &tconn->flags);
2105 return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h)); 1915 wake_asender(tconn);
2106} 1916}
2107 1917
2108static inline int drbd_send_ping(struct drbd_conf *mdev) 1918extern void *conn_prepare_command(struct drbd_tconn *, struct drbd_socket *);
2109{ 1919extern void *drbd_prepare_command(struct drbd_conf *, struct drbd_socket *);
2110 struct p_header80 h; 1920extern int conn_send_command(struct drbd_tconn *, struct drbd_socket *,
2111 return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h)); 1921 enum drbd_packet, unsigned int, void *,
2112} 1922 unsigned int);
1923extern int drbd_send_command(struct drbd_conf *, struct drbd_socket *,
1924 enum drbd_packet, unsigned int, void *,
1925 unsigned int);
2113 1926
2114static inline int drbd_send_ping_ack(struct drbd_conf *mdev) 1927extern int drbd_send_ping(struct drbd_tconn *tconn);
2115{ 1928extern int drbd_send_ping_ack(struct drbd_tconn *tconn);
2116 struct p_header80 h; 1929extern int drbd_send_state_req(struct drbd_conf *, union drbd_state, union drbd_state);
2117 return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h)); 1930extern int conn_send_state_req(struct drbd_tconn *, union drbd_state, union drbd_state);
2118}
2119 1931
2120static inline void drbd_thread_stop(struct drbd_thread *thi) 1932static inline void drbd_thread_stop(struct drbd_thread *thi)
2121{ 1933{
@@ -2137,21 +1949,21 @@ static inline void drbd_thread_restart_nowait(struct drbd_thread *thi)
2137 * or implicit barrier packets as necessary. 1949 * or implicit barrier packets as necessary.
2138 * increased: 1950 * increased:
2139 * w_send_barrier 1951 * w_send_barrier
2140 * _req_mod(req, queue_for_net_write or queue_for_net_read); 1952 * _req_mod(req, QUEUE_FOR_NET_WRITE or QUEUE_FOR_NET_READ);
2141 * it is much easier and equally valid to count what we queue for the 1953 * it is much easier and equally valid to count what we queue for the
2142 * worker, even before it actually was queued or send. 1954 * worker, even before it actually was queued or send.
2143 * (drbd_make_request_common; recovery path on read io-error) 1955 * (drbd_make_request_common; recovery path on read io-error)
2144 * decreased: 1956 * decreased:
2145 * got_BarrierAck (respective tl_clear, tl_clear_barrier) 1957 * got_BarrierAck (respective tl_clear, tl_clear_barrier)
2146 * _req_mod(req, data_received) 1958 * _req_mod(req, DATA_RECEIVED)
2147 * [from receive_DataReply] 1959 * [from receive_DataReply]
2148 * _req_mod(req, write_acked_by_peer or recv_acked_by_peer or neg_acked) 1960 * _req_mod(req, WRITE_ACKED_BY_PEER or RECV_ACKED_BY_PEER or NEG_ACKED)
2149 * [from got_BlockAck (P_WRITE_ACK, P_RECV_ACK)] 1961 * [from got_BlockAck (P_WRITE_ACK, P_RECV_ACK)]
2150 * for some reason it is NOT decreased in got_NegAck, 1962 * for some reason it is NOT decreased in got_NegAck,
2151 * but in the resulting cleanup code from report_params. 1963 * but in the resulting cleanup code from report_params.
2152 * we should try to remember the reason for that... 1964 * we should try to remember the reason for that...
2153 * _req_mod(req, send_failed or send_canceled) 1965 * _req_mod(req, SEND_FAILED or SEND_CANCELED)
2154 * _req_mod(req, connection_lost_while_pending) 1966 * _req_mod(req, CONNECTION_LOST_WHILE_PENDING)
2155 * [from tl_clear_barrier] 1967 * [from tl_clear_barrier]
2156 */ 1968 */
2157static inline void inc_ap_pending(struct drbd_conf *mdev) 1969static inline void inc_ap_pending(struct drbd_conf *mdev)
@@ -2159,17 +1971,19 @@ static inline void inc_ap_pending(struct drbd_conf *mdev)
2159 atomic_inc(&mdev->ap_pending_cnt); 1971 atomic_inc(&mdev->ap_pending_cnt);
2160} 1972}
2161 1973
2162#define ERR_IF_CNT_IS_NEGATIVE(which) \ 1974#define ERR_IF_CNT_IS_NEGATIVE(which, func, line) \
2163 if (atomic_read(&mdev->which) < 0) \ 1975 if (atomic_read(&mdev->which) < 0) \
2164 dev_err(DEV, "in %s:%d: " #which " = %d < 0 !\n", \ 1976 dev_err(DEV, "in %s:%d: " #which " = %d < 0 !\n", \
2165 __func__ , __LINE__ , \ 1977 func, line, \
2166 atomic_read(&mdev->which)) 1978 atomic_read(&mdev->which))
2167 1979
2168#define dec_ap_pending(mdev) do { \ 1980#define dec_ap_pending(mdev) _dec_ap_pending(mdev, __FUNCTION__, __LINE__)
2169 typecheck(struct drbd_conf *, mdev); \ 1981static inline void _dec_ap_pending(struct drbd_conf *mdev, const char *func, int line)
2170 if (atomic_dec_and_test(&mdev->ap_pending_cnt)) \ 1982{
2171 wake_up(&mdev->misc_wait); \ 1983 if (atomic_dec_and_test(&mdev->ap_pending_cnt))
2172 ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt); } while (0) 1984 wake_up(&mdev->misc_wait);
1985 ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt, func, line);
1986}
2173 1987
2174/* counts how many resync-related answers we still expect from the peer 1988/* counts how many resync-related answers we still expect from the peer
2175 * increase decrease 1989 * increase decrease
@@ -2182,10 +1996,12 @@ static inline void inc_rs_pending(struct drbd_conf *mdev)
2182 atomic_inc(&mdev->rs_pending_cnt); 1996 atomic_inc(&mdev->rs_pending_cnt);
2183} 1997}
2184 1998
2185#define dec_rs_pending(mdev) do { \ 1999#define dec_rs_pending(mdev) _dec_rs_pending(mdev, __FUNCTION__, __LINE__)
2186 typecheck(struct drbd_conf *, mdev); \ 2000static inline void _dec_rs_pending(struct drbd_conf *mdev, const char *func, int line)
2187 atomic_dec(&mdev->rs_pending_cnt); \ 2001{
2188 ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt); } while (0) 2002 atomic_dec(&mdev->rs_pending_cnt);
2003 ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt, func, line);
2004}
2189 2005
2190/* counts how many answers we still need to send to the peer. 2006/* counts how many answers we still need to send to the peer.
2191 * increased on 2007 * increased on
@@ -2201,38 +2017,18 @@ static inline void inc_unacked(struct drbd_conf *mdev)
2201 atomic_inc(&mdev->unacked_cnt); 2017 atomic_inc(&mdev->unacked_cnt);
2202} 2018}
2203 2019
2204#define dec_unacked(mdev) do { \ 2020#define dec_unacked(mdev) _dec_unacked(mdev, __FUNCTION__, __LINE__)
2205 typecheck(struct drbd_conf *, mdev); \ 2021static inline void _dec_unacked(struct drbd_conf *mdev, const char *func, int line)
2206 atomic_dec(&mdev->unacked_cnt); \
2207 ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0)
2208
2209#define sub_unacked(mdev, n) do { \
2210 typecheck(struct drbd_conf *, mdev); \
2211 atomic_sub(n, &mdev->unacked_cnt); \
2212 ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0)
2213
2214
2215static inline void put_net_conf(struct drbd_conf *mdev)
2216{ 2022{
2217 if (atomic_dec_and_test(&mdev->net_cnt)) 2023 atomic_dec(&mdev->unacked_cnt);
2218 wake_up(&mdev->net_cnt_wait); 2024 ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
2219} 2025}
2220 2026
2221/** 2027#define sub_unacked(mdev, n) _sub_unacked(mdev, n, __FUNCTION__, __LINE__)
2222 * get_net_conf() - Increase ref count on mdev->net_conf; Returns 0 if nothing there 2028static inline void _sub_unacked(struct drbd_conf *mdev, int n, const char *func, int line)
2223 * @mdev: DRBD device.
2224 *
2225 * You have to call put_net_conf() when finished working with mdev->net_conf.
2226 */
2227static inline int get_net_conf(struct drbd_conf *mdev)
2228{ 2029{
2229 int have_net_conf; 2030 atomic_sub(n, &mdev->unacked_cnt);
2230 2031 ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
2231 atomic_inc(&mdev->net_cnt);
2232 have_net_conf = mdev->state.conn >= C_UNCONNECTED;
2233 if (!have_net_conf)
2234 put_net_conf(mdev);
2235 return have_net_conf;
2236} 2032}
2237 2033
2238/** 2034/**
@@ -2336,17 +2132,20 @@ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev,
2336 * maybe re-implement using semaphores? */ 2132 * maybe re-implement using semaphores? */
2337static inline int drbd_get_max_buffers(struct drbd_conf *mdev) 2133static inline int drbd_get_max_buffers(struct drbd_conf *mdev)
2338{ 2134{
2339 int mxb = 1000000; /* arbitrary limit on open requests */ 2135 struct net_conf *nc;
2340 if (get_net_conf(mdev)) { 2136 int mxb;
2341 mxb = mdev->net_conf->max_buffers; 2137
2342 put_net_conf(mdev); 2138 rcu_read_lock();
2343 } 2139 nc = rcu_dereference(mdev->tconn->net_conf);
2140 mxb = nc ? nc->max_buffers : 1000000; /* arbitrary limit on open requests */
2141 rcu_read_unlock();
2142
2344 return mxb; 2143 return mxb;
2345} 2144}
2346 2145
2347static inline int drbd_state_is_stable(struct drbd_conf *mdev) 2146static inline int drbd_state_is_stable(struct drbd_conf *mdev)
2348{ 2147{
2349 union drbd_state s = mdev->state; 2148 union drbd_dev_state s = mdev->state;
2350 2149
2351 /* DO NOT add a default clause, we want the compiler to warn us 2150 /* DO NOT add a default clause, we want the compiler to warn us
2352 * for any newly introduced state we may have forgotten to add here */ 2151 * for any newly introduced state we may have forgotten to add here */
@@ -2380,7 +2179,7 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev)
2380 2179
2381 /* Allow IO in BM exchange states with new protocols */ 2180 /* Allow IO in BM exchange states with new protocols */
2382 case C_WF_BITMAP_S: 2181 case C_WF_BITMAP_S:
2383 if (mdev->agreed_pro_version < 96) 2182 if (mdev->tconn->agreed_pro_version < 96)
2384 return 0; 2183 return 0;
2385 break; 2184 break;
2386 2185
@@ -2402,7 +2201,7 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev)
2402 /* disk state is stable as well. */ 2201 /* disk state is stable as well. */
2403 break; 2202 break;
2404 2203
2405 /* no new io accepted during tansitional states */ 2204 /* no new io accepted during transitional states */
2406 case D_ATTACHING: 2205 case D_ATTACHING:
2407 case D_NEGOTIATING: 2206 case D_NEGOTIATING:
2408 case D_UNKNOWN: 2207 case D_UNKNOWN:
@@ -2414,18 +2213,20 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev)
2414 return 1; 2213 return 1;
2415} 2214}
2416 2215
2417static inline int is_susp(union drbd_state s) 2216static inline int drbd_suspended(struct drbd_conf *mdev)
2418{ 2217{
2419 return s.susp || s.susp_nod || s.susp_fen; 2218 struct drbd_tconn *tconn = mdev->tconn;
2219
2220 return tconn->susp || tconn->susp_fen || tconn->susp_nod;
2420} 2221}
2421 2222
2422static inline bool may_inc_ap_bio(struct drbd_conf *mdev) 2223static inline bool may_inc_ap_bio(struct drbd_conf *mdev)
2423{ 2224{
2424 int mxb = drbd_get_max_buffers(mdev); 2225 int mxb = drbd_get_max_buffers(mdev);
2425 2226
2426 if (is_susp(mdev->state)) 2227 if (drbd_suspended(mdev))
2427 return false; 2228 return false;
2428 if (drbd_test_flag(mdev, SUSPEND_IO)) 2229 if (test_bit(SUSPEND_IO, &mdev->flags))
2429 return false; 2230 return false;
2430 2231
2431 /* to avoid potential deadlock or bitmap corruption, 2232 /* to avoid potential deadlock or bitmap corruption,
@@ -2440,35 +2241,35 @@ static inline bool may_inc_ap_bio(struct drbd_conf *mdev)
2440 * and we are within the spinlock anyways, we have this workaround. */ 2241 * and we are within the spinlock anyways, we have this workaround. */
2441 if (atomic_read(&mdev->ap_bio_cnt) > mxb) 2242 if (atomic_read(&mdev->ap_bio_cnt) > mxb)
2442 return false; 2243 return false;
2443 if (drbd_test_flag(mdev, BITMAP_IO)) 2244 if (test_bit(BITMAP_IO, &mdev->flags))
2444 return false; 2245 return false;
2445 return true; 2246 return true;
2446} 2247}
2447 2248
2448static inline bool inc_ap_bio_cond(struct drbd_conf *mdev, int count) 2249static inline bool inc_ap_bio_cond(struct drbd_conf *mdev)
2449{ 2250{
2450 bool rv = false; 2251 bool rv = false;
2451 2252
2452 spin_lock_irq(&mdev->req_lock); 2253 spin_lock_irq(&mdev->tconn->req_lock);
2453 rv = may_inc_ap_bio(mdev); 2254 rv = may_inc_ap_bio(mdev);
2454 if (rv) 2255 if (rv)
2455 atomic_add(count, &mdev->ap_bio_cnt); 2256 atomic_inc(&mdev->ap_bio_cnt);
2456 spin_unlock_irq(&mdev->req_lock); 2257 spin_unlock_irq(&mdev->tconn->req_lock);
2457 2258
2458 return rv; 2259 return rv;
2459} 2260}
2460 2261
2461static inline void inc_ap_bio(struct drbd_conf *mdev, int count) 2262static inline void inc_ap_bio(struct drbd_conf *mdev)
2462{ 2263{
2463 /* we wait here 2264 /* we wait here
2464 * as long as the device is suspended 2265 * as long as the device is suspended
2465 * until the bitmap is no longer on the fly during connection 2266 * until the bitmap is no longer on the fly during connection
2466 * handshake as long as we would exeed the max_buffer limit. 2267 * handshake as long as we would exceed the max_buffer limit.
2467 * 2268 *
2468 * to avoid races with the reconnect code, 2269 * to avoid races with the reconnect code,
2469 * we need to atomic_inc within the spinlock. */ 2270 * we need to atomic_inc within the spinlock. */
2470 2271
2471 wait_event(mdev->misc_wait, inc_ap_bio_cond(mdev, count)); 2272 wait_event(mdev->misc_wait, inc_ap_bio_cond(mdev));
2472} 2273}
2473 2274
2474static inline void dec_ap_bio(struct drbd_conf *mdev) 2275static inline void dec_ap_bio(struct drbd_conf *mdev)
@@ -2478,9 +2279,9 @@ static inline void dec_ap_bio(struct drbd_conf *mdev)
2478 2279
2479 D_ASSERT(ap_bio >= 0); 2280 D_ASSERT(ap_bio >= 0);
2480 2281
2481 if (ap_bio == 0 && drbd_test_flag(mdev, BITMAP_IO)) { 2282 if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) {
2482 if (!drbd_test_and_set_flag(mdev, BITMAP_IO_QUEUED)) 2283 if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
2483 drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w); 2284 drbd_queue_work(&mdev->tconn->sender_work, &mdev->bm_io_work.w);
2484 } 2285 }
2485 2286
2486 /* this currently does wake_up for every dec_ap_bio! 2287 /* this currently does wake_up for every dec_ap_bio!
@@ -2490,6 +2291,12 @@ static inline void dec_ap_bio(struct drbd_conf *mdev)
2490 wake_up(&mdev->misc_wait); 2291 wake_up(&mdev->misc_wait);
2491} 2292}
2492 2293
2294static inline bool verify_can_do_stop_sector(struct drbd_conf *mdev)
2295{
2296 return mdev->tconn->agreed_pro_version >= 97 &&
2297 mdev->tconn->agreed_pro_version != 100;
2298}
2299
2493static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) 2300static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val)
2494{ 2301{
2495 int changed = mdev->ed_uuid != val; 2302 int changed = mdev->ed_uuid != val;
@@ -2497,40 +2304,6 @@ static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val)
2497 return changed; 2304 return changed;
2498} 2305}
2499 2306
2500static inline int seq_cmp(u32 a, u32 b)
2501{
2502 /* we assume wrap around at 32bit.
2503 * for wrap around at 24bit (old atomic_t),
2504 * we'd have to
2505 * a <<= 8; b <<= 8;
2506 */
2507 return (s32)(a) - (s32)(b);
2508}
2509#define seq_lt(a, b) (seq_cmp((a), (b)) < 0)
2510#define seq_gt(a, b) (seq_cmp((a), (b)) > 0)
2511#define seq_ge(a, b) (seq_cmp((a), (b)) >= 0)
2512#define seq_le(a, b) (seq_cmp((a), (b)) <= 0)
2513/* CAUTION: please no side effects in arguments! */
2514#define seq_max(a, b) ((u32)(seq_gt((a), (b)) ? (a) : (b)))
2515
2516static inline void update_peer_seq(struct drbd_conf *mdev, unsigned int new_seq)
2517{
2518 unsigned int m;
2519 spin_lock(&mdev->peer_seq_lock);
2520 m = seq_max(mdev->peer_seq, new_seq);
2521 mdev->peer_seq = m;
2522 spin_unlock(&mdev->peer_seq_lock);
2523 if (m == new_seq)
2524 wake_up(&mdev->seq_wait);
2525}
2526
2527static inline void drbd_update_congested(struct drbd_conf *mdev)
2528{
2529 struct sock *sk = mdev->data.socket->sk;
2530 if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5)
2531 drbd_set_flag(mdev, NET_CONGESTED);
2532}
2533
2534static inline int drbd_queue_order_type(struct drbd_conf *mdev) 2307static inline int drbd_queue_order_type(struct drbd_conf *mdev)
2535{ 2308{
2536 /* sorry, we currently have no working implementation 2309 /* sorry, we currently have no working implementation
@@ -2545,15 +2318,46 @@ static inline void drbd_md_flush(struct drbd_conf *mdev)
2545{ 2318{
2546 int r; 2319 int r;
2547 2320
2548 if (drbd_test_flag(mdev, MD_NO_FUA)) 2321 if (mdev->ldev == NULL) {
2322 dev_warn(DEV, "mdev->ldev == NULL in drbd_md_flush\n");
2323 return;
2324 }
2325
2326 if (test_bit(MD_NO_FUA, &mdev->flags))
2549 return; 2327 return;
2550 2328
2551 r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_NOIO, NULL); 2329 r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_NOIO, NULL);
2552 if (r) { 2330 if (r) {
2553 drbd_set_flag(mdev, MD_NO_FUA); 2331 set_bit(MD_NO_FUA, &mdev->flags);
2554 dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r); 2332 dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r);
2555 } 2333 }
2556} 2334}
2557 2335
2558
2559#endif 2336#endif
2337
2338/* This is defined in drivers/md/md.h as well. Should go into wait.h */
2339#define __wait_event_lock_irq(wq, condition, lock, cmd) \
2340do { \
2341 wait_queue_t __wait; \
2342 init_waitqueue_entry(&__wait, current); \
2343 \
2344 add_wait_queue(&wq, &__wait); \
2345 for (;;) { \
2346 set_current_state(TASK_UNINTERRUPTIBLE); \
2347 if (condition) \
2348 break; \
2349 spin_unlock_irq(&lock); \
2350 cmd; \
2351 schedule(); \
2352 spin_lock_irq(&lock); \
2353 } \
2354 current->state = TASK_RUNNING; \
2355 remove_wait_queue(&wq, &__wait); \
2356} while (0)
2357
2358#define wait_event_lock_irq(wq, condition, lock, cmd) \
2359do { \
2360 if (condition) \
2361 break; \
2362 __wait_event_lock_irq(wq, condition, lock, cmd); \
2363} while (0)