aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd/drbd_receiver.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/drbd/drbd_receiver.c')
-rw-r--r--drivers/block/drbd/drbd_receiver.c3894
1 files changed, 2276 insertions, 1618 deletions
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index c74ca2df7431..a9eccfc6079b 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -48,17 +48,25 @@
48 48
49#include "drbd_vli.h" 49#include "drbd_vli.h"
50 50
51struct packet_info {
52 enum drbd_packet cmd;
53 unsigned int size;
54 unsigned int vnr;
55 void *data;
56};
57
51enum finish_epoch { 58enum finish_epoch {
52 FE_STILL_LIVE, 59 FE_STILL_LIVE,
53 FE_DESTROYED, 60 FE_DESTROYED,
54 FE_RECYCLED, 61 FE_RECYCLED,
55}; 62};
56 63
57static int drbd_do_handshake(struct drbd_conf *mdev); 64static int drbd_do_features(struct drbd_tconn *tconn);
58static int drbd_do_auth(struct drbd_conf *mdev); 65static int drbd_do_auth(struct drbd_tconn *tconn);
66static int drbd_disconnected(struct drbd_conf *mdev);
59 67
60static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event); 68static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *, struct drbd_epoch *, enum epoch_event);
61static int e_end_block(struct drbd_conf *, struct drbd_work *, int); 69static int e_end_block(struct drbd_work *, int);
62 70
63 71
64#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) 72#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
@@ -142,11 +150,12 @@ static void page_chain_add(struct page **head,
142 *head = chain_first; 150 *head = chain_first;
143} 151}
144 152
145static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number) 153static struct page *__drbd_alloc_pages(struct drbd_conf *mdev,
154 unsigned int number)
146{ 155{
147 struct page *page = NULL; 156 struct page *page = NULL;
148 struct page *tmp = NULL; 157 struct page *tmp = NULL;
149 int i = 0; 158 unsigned int i = 0;
150 159
151 /* Yes, testing drbd_pp_vacant outside the lock is racy. 160 /* Yes, testing drbd_pp_vacant outside the lock is racy.
152 * So what. It saves a spin_lock. */ 161 * So what. It saves a spin_lock. */
@@ -175,7 +184,7 @@ static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int
175 return page; 184 return page;
176 185
177 /* Not enough pages immediately available this time. 186 /* Not enough pages immediately available this time.
178 * No need to jump around here, drbd_pp_alloc will retry this 187 * No need to jump around here, drbd_alloc_pages will retry this
179 * function "soon". */ 188 * function "soon". */
180 if (page) { 189 if (page) {
181 tmp = page_chain_tail(page, NULL); 190 tmp = page_chain_tail(page, NULL);
@@ -187,9 +196,10 @@ static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int
187 return NULL; 196 return NULL;
188} 197}
189 198
190static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed) 199static void reclaim_finished_net_peer_reqs(struct drbd_conf *mdev,
200 struct list_head *to_be_freed)
191{ 201{
192 struct drbd_epoch_entry *e; 202 struct drbd_peer_request *peer_req;
193 struct list_head *le, *tle; 203 struct list_head *le, *tle;
194 204
195 /* The EEs are always appended to the end of the list. Since 205 /* The EEs are always appended to the end of the list. Since
@@ -198,8 +208,8 @@ static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed
198 stop to examine the list... */ 208 stop to examine the list... */
199 209
200 list_for_each_safe(le, tle, &mdev->net_ee) { 210 list_for_each_safe(le, tle, &mdev->net_ee) {
201 e = list_entry(le, struct drbd_epoch_entry, w.list); 211 peer_req = list_entry(le, struct drbd_peer_request, w.list);
202 if (drbd_ee_has_active_page(e)) 212 if (drbd_peer_req_has_active_page(peer_req))
203 break; 213 break;
204 list_move(le, to_be_freed); 214 list_move(le, to_be_freed);
205 } 215 }
@@ -208,18 +218,18 @@ static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed
208static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev) 218static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
209{ 219{
210 LIST_HEAD(reclaimed); 220 LIST_HEAD(reclaimed);
211 struct drbd_epoch_entry *e, *t; 221 struct drbd_peer_request *peer_req, *t;
212 222
213 spin_lock_irq(&mdev->req_lock); 223 spin_lock_irq(&mdev->tconn->req_lock);
214 reclaim_net_ee(mdev, &reclaimed); 224 reclaim_finished_net_peer_reqs(mdev, &reclaimed);
215 spin_unlock_irq(&mdev->req_lock); 225 spin_unlock_irq(&mdev->tconn->req_lock);
216 226
217 list_for_each_entry_safe(e, t, &reclaimed, w.list) 227 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
218 drbd_free_net_ee(mdev, e); 228 drbd_free_net_peer_req(mdev, peer_req);
219} 229}
220 230
221/** 231/**
222 * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled) 232 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
223 * @mdev: DRBD device. 233 * @mdev: DRBD device.
224 * @number: number of pages requested 234 * @number: number of pages requested
225 * @retry: whether to retry, if not enough pages are available right now 235 * @retry: whether to retry, if not enough pages are available right now
@@ -230,23 +240,31 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
230 * 240 *
231 * Returns a page chain linked via page->private. 241 * Returns a page chain linked via page->private.
232 */ 242 */
233static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry) 243struct page *drbd_alloc_pages(struct drbd_conf *mdev, unsigned int number,
244 bool retry)
234{ 245{
235 struct page *page = NULL; 246 struct page *page = NULL;
247 struct net_conf *nc;
236 DEFINE_WAIT(wait); 248 DEFINE_WAIT(wait);
249 int mxb;
237 250
238 /* Yes, we may run up to @number over max_buffers. If we 251 /* Yes, we may run up to @number over max_buffers. If we
239 * follow it strictly, the admin will get it wrong anyways. */ 252 * follow it strictly, the admin will get it wrong anyways. */
240 if (atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers) 253 rcu_read_lock();
241 page = drbd_pp_first_pages_or_try_alloc(mdev, number); 254 nc = rcu_dereference(mdev->tconn->net_conf);
255 mxb = nc ? nc->max_buffers : 1000000;
256 rcu_read_unlock();
257
258 if (atomic_read(&mdev->pp_in_use) < mxb)
259 page = __drbd_alloc_pages(mdev, number);
242 260
243 while (page == NULL) { 261 while (page == NULL) {
244 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE); 262 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
245 263
246 drbd_kick_lo_and_reclaim_net(mdev); 264 drbd_kick_lo_and_reclaim_net(mdev);
247 265
248 if (atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers) { 266 if (atomic_read(&mdev->pp_in_use) < mxb) {
249 page = drbd_pp_first_pages_or_try_alloc(mdev, number); 267 page = __drbd_alloc_pages(mdev, number);
250 if (page) 268 if (page)
251 break; 269 break;
252 } 270 }
@@ -255,7 +273,7 @@ static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool
255 break; 273 break;
256 274
257 if (signal_pending(current)) { 275 if (signal_pending(current)) {
258 dev_warn(DEV, "drbd_pp_alloc interrupted!\n"); 276 dev_warn(DEV, "drbd_alloc_pages interrupted!\n");
259 break; 277 break;
260 } 278 }
261 279
@@ -268,11 +286,11 @@ static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool
268 return page; 286 return page;
269} 287}
270 288
271/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc. 289/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
272 * Is also used from inside an other spin_lock_irq(&mdev->req_lock); 290 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
273 * Either links the page chain back to the global pool, 291 * Either links the page chain back to the global pool,
274 * or returns all pages to the system. */ 292 * or returns all pages to the system. */
275static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net) 293static void drbd_free_pages(struct drbd_conf *mdev, struct page *page, int is_net)
276{ 294{
277 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use; 295 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
278 int i; 296 int i;
@@ -280,7 +298,7 @@ static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
280 if (page == NULL) 298 if (page == NULL)
281 return; 299 return;
282 300
283 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count) 301 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
284 i = page_chain_free(page); 302 i = page_chain_free(page);
285 else { 303 else {
286 struct page *tmp; 304 struct page *tmp;
@@ -302,127 +320,130 @@ You need to hold the req_lock:
302 _drbd_wait_ee_list_empty() 320 _drbd_wait_ee_list_empty()
303 321
304You must not have the req_lock: 322You must not have the req_lock:
305 drbd_free_ee() 323 drbd_free_peer_req()
306 drbd_alloc_ee() 324 drbd_alloc_peer_req()
307 drbd_init_ee() 325 drbd_free_peer_reqs()
308 drbd_release_ee()
309 drbd_ee_fix_bhs() 326 drbd_ee_fix_bhs()
310 drbd_process_done_ee() 327 drbd_finish_peer_reqs()
311 drbd_clear_done_ee() 328 drbd_clear_done_ee()
312 drbd_wait_ee_list_empty() 329 drbd_wait_ee_list_empty()
313*/ 330*/
314 331
315struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, 332struct drbd_peer_request *
316 u64 id, 333drbd_alloc_peer_req(struct drbd_conf *mdev, u64 id, sector_t sector,
317 sector_t sector, 334 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
318 unsigned int data_size,
319 gfp_t gfp_mask) __must_hold(local)
320{ 335{
321 struct drbd_epoch_entry *e; 336 struct drbd_peer_request *peer_req;
322 struct page *page = NULL; 337 struct page *page = NULL;
323 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; 338 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
324 339
325 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE)) 340 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
326 return NULL; 341 return NULL;
327 342
328 e = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM); 343 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
329 if (!e) { 344 if (!peer_req) {
330 if (!(gfp_mask & __GFP_NOWARN)) 345 if (!(gfp_mask & __GFP_NOWARN))
331 dev_err(DEV, "alloc_ee: Allocation of an EE failed\n"); 346 dev_err(DEV, "%s: allocation failed\n", __func__);
332 return NULL; 347 return NULL;
333 } 348 }
334 349
335 if (data_size) { 350 if (data_size) {
336 page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT)); 351 page = drbd_alloc_pages(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
337 if (!page) 352 if (!page)
338 goto fail; 353 goto fail;
339 } 354 }
340 355
341 INIT_HLIST_NODE(&e->collision); 356 drbd_clear_interval(&peer_req->i);
342 e->epoch = NULL; 357 peer_req->i.size = data_size;
343 e->mdev = mdev; 358 peer_req->i.sector = sector;
344 e->pages = page; 359 peer_req->i.local = false;
345 atomic_set(&e->pending_bios, 0); 360 peer_req->i.waiting = false;
346 e->size = data_size; 361
347 e->flags = 0; 362 peer_req->epoch = NULL;
348 e->sector = sector; 363 peer_req->w.mdev = mdev;
349 e->block_id = id; 364 peer_req->pages = page;
365 atomic_set(&peer_req->pending_bios, 0);
366 peer_req->flags = 0;
367 /*
368 * The block_id is opaque to the receiver. It is not endianness
369 * converted, and sent back to the sender unchanged.
370 */
371 peer_req->block_id = id;
350 372
351 return e; 373 return peer_req;
352 374
353 fail: 375 fail:
354 mempool_free(e, drbd_ee_mempool); 376 mempool_free(peer_req, drbd_ee_mempool);
355 return NULL; 377 return NULL;
356} 378}
357 379
358void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int is_net) 380void __drbd_free_peer_req(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
381 int is_net)
359{ 382{
360 if (e->flags & EE_HAS_DIGEST) 383 if (peer_req->flags & EE_HAS_DIGEST)
361 kfree(e->digest); 384 kfree(peer_req->digest);
362 drbd_pp_free(mdev, e->pages, is_net); 385 drbd_free_pages(mdev, peer_req->pages, is_net);
363 D_ASSERT(atomic_read(&e->pending_bios) == 0); 386 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
364 D_ASSERT(hlist_unhashed(&e->collision)); 387 D_ASSERT(drbd_interval_empty(&peer_req->i));
365 mempool_free(e, drbd_ee_mempool); 388 mempool_free(peer_req, drbd_ee_mempool);
366} 389}
367 390
368int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list) 391int drbd_free_peer_reqs(struct drbd_conf *mdev, struct list_head *list)
369{ 392{
370 LIST_HEAD(work_list); 393 LIST_HEAD(work_list);
371 struct drbd_epoch_entry *e, *t; 394 struct drbd_peer_request *peer_req, *t;
372 int count = 0; 395 int count = 0;
373 int is_net = list == &mdev->net_ee; 396 int is_net = list == &mdev->net_ee;
374 397
375 spin_lock_irq(&mdev->req_lock); 398 spin_lock_irq(&mdev->tconn->req_lock);
376 list_splice_init(list, &work_list); 399 list_splice_init(list, &work_list);
377 spin_unlock_irq(&mdev->req_lock); 400 spin_unlock_irq(&mdev->tconn->req_lock);
378 401
379 list_for_each_entry_safe(e, t, &work_list, w.list) { 402 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
380 drbd_free_some_ee(mdev, e, is_net); 403 __drbd_free_peer_req(mdev, peer_req, is_net);
381 count++; 404 count++;
382 } 405 }
383 return count; 406 return count;
384} 407}
385 408
386
387/* 409/*
388 * This function is called from _asender only_ 410 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
389 * but see also comments in _req_mod(,barrier_acked)
390 * and receive_Barrier.
391 *
392 * Move entries from net_ee to done_ee, if ready.
393 * Grab done_ee, call all callbacks, free the entries.
394 * The callbacks typically send out ACKs.
395 */ 411 */
396static int drbd_process_done_ee(struct drbd_conf *mdev) 412static int drbd_finish_peer_reqs(struct drbd_conf *mdev)
397{ 413{
398 LIST_HEAD(work_list); 414 LIST_HEAD(work_list);
399 LIST_HEAD(reclaimed); 415 LIST_HEAD(reclaimed);
400 struct drbd_epoch_entry *e, *t; 416 struct drbd_peer_request *peer_req, *t;
401 int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS); 417 int err = 0;
402 418
403 spin_lock_irq(&mdev->req_lock); 419 spin_lock_irq(&mdev->tconn->req_lock);
404 reclaim_net_ee(mdev, &reclaimed); 420 reclaim_finished_net_peer_reqs(mdev, &reclaimed);
405 list_splice_init(&mdev->done_ee, &work_list); 421 list_splice_init(&mdev->done_ee, &work_list);
406 spin_unlock_irq(&mdev->req_lock); 422 spin_unlock_irq(&mdev->tconn->req_lock);
407 423
408 list_for_each_entry_safe(e, t, &reclaimed, w.list) 424 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
409 drbd_free_net_ee(mdev, e); 425 drbd_free_net_peer_req(mdev, peer_req);
410 426
411 /* possible callbacks here: 427 /* possible callbacks here:
412 * e_end_block, and e_end_resync_block, e_send_discard_ack. 428 * e_end_block, and e_end_resync_block, e_send_superseded.
413 * all ignore the last argument. 429 * all ignore the last argument.
414 */ 430 */
415 list_for_each_entry_safe(e, t, &work_list, w.list) { 431 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
432 int err2;
433
416 /* list_del not necessary, next/prev members not touched */ 434 /* list_del not necessary, next/prev members not touched */
417 ok = e->w.cb(mdev, &e->w, !ok) && ok; 435 err2 = peer_req->w.cb(&peer_req->w, !!err);
418 drbd_free_ee(mdev, e); 436 if (!err)
437 err = err2;
438 drbd_free_peer_req(mdev, peer_req);
419 } 439 }
420 wake_up(&mdev->ee_wait); 440 wake_up(&mdev->ee_wait);
421 441
422 return ok; 442 return err;
423} 443}
424 444
425void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head) 445static void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
446 struct list_head *head)
426{ 447{
427 DEFINE_WAIT(wait); 448 DEFINE_WAIT(wait);
428 449
@@ -430,55 +451,22 @@ void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
430 * and calling prepare_to_wait in the fast path */ 451 * and calling prepare_to_wait in the fast path */
431 while (!list_empty(head)) { 452 while (!list_empty(head)) {
432 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE); 453 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
433 spin_unlock_irq(&mdev->req_lock); 454 spin_unlock_irq(&mdev->tconn->req_lock);
434 io_schedule(); 455 io_schedule();
435 finish_wait(&mdev->ee_wait, &wait); 456 finish_wait(&mdev->ee_wait, &wait);
436 spin_lock_irq(&mdev->req_lock); 457 spin_lock_irq(&mdev->tconn->req_lock);
437 } 458 }
438} 459}
439 460
440void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head) 461static void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
462 struct list_head *head)
441{ 463{
442 spin_lock_irq(&mdev->req_lock); 464 spin_lock_irq(&mdev->tconn->req_lock);
443 _drbd_wait_ee_list_empty(mdev, head); 465 _drbd_wait_ee_list_empty(mdev, head);
444 spin_unlock_irq(&mdev->req_lock); 466 spin_unlock_irq(&mdev->tconn->req_lock);
445}
446
447/* see also kernel_accept; which is only present since 2.6.18.
448 * also we want to log which part of it failed, exactly */
449static int drbd_accept(struct drbd_conf *mdev, const char **what,
450 struct socket *sock, struct socket **newsock)
451{
452 struct sock *sk = sock->sk;
453 int err = 0;
454
455 *what = "listen";
456 err = sock->ops->listen(sock, 5);
457 if (err < 0)
458 goto out;
459
460 *what = "sock_create_lite";
461 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
462 newsock);
463 if (err < 0)
464 goto out;
465
466 *what = "accept";
467 err = sock->ops->accept(sock, *newsock, 0);
468 if (err < 0) {
469 sock_release(*newsock);
470 *newsock = NULL;
471 goto out;
472 }
473 (*newsock)->ops = sock->ops;
474 __module_get((*newsock)->ops->owner);
475
476out:
477 return err;
478} 467}
479 468
480static int drbd_recv_short(struct drbd_conf *mdev, struct socket *sock, 469static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
481 void *buf, size_t size, int flags)
482{ 470{
483 mm_segment_t oldfs; 471 mm_segment_t oldfs;
484 struct kvec iov = { 472 struct kvec iov = {
@@ -500,59 +488,62 @@ static int drbd_recv_short(struct drbd_conf *mdev, struct socket *sock,
500 return rv; 488 return rv;
501} 489}
502 490
503static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) 491static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
504{ 492{
505 mm_segment_t oldfs;
506 struct kvec iov = {
507 .iov_base = buf,
508 .iov_len = size,
509 };
510 struct msghdr msg = {
511 .msg_iovlen = 1,
512 .msg_iov = (struct iovec *)&iov,
513 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
514 };
515 int rv; 493 int rv;
516 494
517 oldfs = get_fs(); 495 rv = drbd_recv_short(tconn->data.socket, buf, size, 0);
518 set_fs(KERNEL_DS);
519 496
520 for (;;) { 497 if (rv < 0) {
521 rv = sock_recvmsg(mdev->data.socket, &msg, size, msg.msg_flags); 498 if (rv == -ECONNRESET)
522 if (rv == size) 499 conn_info(tconn, "sock was reset by peer\n");
523 break; 500 else if (rv != -ERESTARTSYS)
501 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
502 } else if (rv == 0) {
503 if (test_bit(DISCONNECT_SENT, &tconn->flags)) {
504 long t;
505 rcu_read_lock();
506 t = rcu_dereference(tconn->net_conf)->ping_timeo * HZ/10;
507 rcu_read_unlock();
524 508
525 /* Note: 509 t = wait_event_timeout(tconn->ping_wait, tconn->cstate < C_WF_REPORT_PARAMS, t);
526 * ECONNRESET other side closed the connection
527 * ERESTARTSYS (on sock) we got a signal
528 */
529 510
530 if (rv < 0) { 511 if (t)
531 if (rv == -ECONNRESET) 512 goto out;
532 dev_info(DEV, "sock was reset by peer\n");
533 else if (rv != -ERESTARTSYS)
534 dev_err(DEV, "sock_recvmsg returned %d\n", rv);
535 break;
536 } else if (rv == 0) {
537 dev_info(DEV, "sock was shut down by peer\n");
538 break;
539 } else {
540 /* signal came in, or peer/link went down,
541 * after we read a partial message
542 */
543 /* D_ASSERT(signal_pending(current)); */
544 break;
545 } 513 }
546 }; 514 conn_info(tconn, "sock was shut down by peer\n");
547 515 }
548 set_fs(oldfs);
549 516
550 if (rv != size) 517 if (rv != size)
551 drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE)); 518 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
552 519
520out:
553 return rv; 521 return rv;
554} 522}
555 523
524static int drbd_recv_all(struct drbd_tconn *tconn, void *buf, size_t size)
525{
526 int err;
527
528 err = drbd_recv(tconn, buf, size);
529 if (err != size) {
530 if (err >= 0)
531 err = -EIO;
532 } else
533 err = 0;
534 return err;
535}
536
537static int drbd_recv_all_warn(struct drbd_tconn *tconn, void *buf, size_t size)
538{
539 int err;
540
541 err = drbd_recv_all(tconn, buf, size);
542 if (err && !signal_pending(current))
543 conn_warn(tconn, "short read (expected size %d)\n", (int)size);
544 return err;
545}
546
556/* quoting tcp(7): 547/* quoting tcp(7):
557 * On individual connections, the socket buffer size must be set prior to the 548 * On individual connections, the socket buffer size must be set prior to the
558 * listen(2) or connect(2) calls in order to have it take effect. 549 * listen(2) or connect(2) calls in order to have it take effect.
@@ -572,29 +563,50 @@ static void drbd_setbufsize(struct socket *sock, unsigned int snd,
572 } 563 }
573} 564}
574 565
575static struct socket *drbd_try_connect(struct drbd_conf *mdev) 566static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
576{ 567{
577 const char *what; 568 const char *what;
578 struct socket *sock; 569 struct socket *sock;
579 struct sockaddr_in6 src_in6; 570 struct sockaddr_in6 src_in6;
580 int err; 571 struct sockaddr_in6 peer_in6;
572 struct net_conf *nc;
573 int err, peer_addr_len, my_addr_len;
574 int sndbuf_size, rcvbuf_size, connect_int;
581 int disconnect_on_error = 1; 575 int disconnect_on_error = 1;
582 576
583 if (!get_net_conf(mdev)) 577 rcu_read_lock();
578 nc = rcu_dereference(tconn->net_conf);
579 if (!nc) {
580 rcu_read_unlock();
584 return NULL; 581 return NULL;
582 }
583 sndbuf_size = nc->sndbuf_size;
584 rcvbuf_size = nc->rcvbuf_size;
585 connect_int = nc->connect_int;
586 rcu_read_unlock();
587
588 my_addr_len = min_t(int, tconn->my_addr_len, sizeof(src_in6));
589 memcpy(&src_in6, &tconn->my_addr, my_addr_len);
590
591 if (((struct sockaddr *)&tconn->my_addr)->sa_family == AF_INET6)
592 src_in6.sin6_port = 0;
593 else
594 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
595
596 peer_addr_len = min_t(int, tconn->peer_addr_len, sizeof(src_in6));
597 memcpy(&peer_in6, &tconn->peer_addr, peer_addr_len);
585 598
586 what = "sock_create_kern"; 599 what = "sock_create_kern";
587 err = sock_create_kern(((struct sockaddr *)mdev->net_conf->my_addr)->sa_family, 600 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
588 SOCK_STREAM, IPPROTO_TCP, &sock); 601 SOCK_STREAM, IPPROTO_TCP, &sock);
589 if (err < 0) { 602 if (err < 0) {
590 sock = NULL; 603 sock = NULL;
591 goto out; 604 goto out;
592 } 605 }
593 606
594 sock->sk->sk_rcvtimeo = 607 sock->sk->sk_rcvtimeo =
595 sock->sk->sk_sndtimeo = mdev->net_conf->try_connect_int*HZ; 608 sock->sk->sk_sndtimeo = connect_int * HZ;
596 drbd_setbufsize(sock, mdev->net_conf->sndbuf_size, 609 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
597 mdev->net_conf->rcvbuf_size);
598 610
599 /* explicitly bind to the configured IP as source IP 611 /* explicitly bind to the configured IP as source IP
600 * for the outgoing connections. 612 * for the outgoing connections.
@@ -603,17 +615,8 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev)
603 * Make sure to use 0 as port number, so linux selects 615 * Make sure to use 0 as port number, so linux selects
604 * a free one dynamically. 616 * a free one dynamically.
605 */ 617 */
606 memcpy(&src_in6, mdev->net_conf->my_addr,
607 min_t(int, mdev->net_conf->my_addr_len, sizeof(src_in6)));
608 if (((struct sockaddr *)mdev->net_conf->my_addr)->sa_family == AF_INET6)
609 src_in6.sin6_port = 0;
610 else
611 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
612
613 what = "bind before connect"; 618 what = "bind before connect";
614 err = sock->ops->bind(sock, 619 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
615 (struct sockaddr *) &src_in6,
616 mdev->net_conf->my_addr_len);
617 if (err < 0) 620 if (err < 0)
618 goto out; 621 goto out;
619 622
@@ -621,9 +624,7 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev)
621 * stay C_WF_CONNECTION, don't go Disconnecting! */ 624 * stay C_WF_CONNECTION, don't go Disconnecting! */
622 disconnect_on_error = 0; 625 disconnect_on_error = 0;
623 what = "connect"; 626 what = "connect";
624 err = sock->ops->connect(sock, 627 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
625 (struct sockaddr *)mdev->net_conf->peer_addr,
626 mdev->net_conf->peer_addr_len, 0);
627 628
628out: 629out:
629 if (err < 0) { 630 if (err < 0) {
@@ -641,91 +642,174 @@ out:
641 disconnect_on_error = 0; 642 disconnect_on_error = 0;
642 break; 643 break;
643 default: 644 default:
644 dev_err(DEV, "%s failed, err = %d\n", what, err); 645 conn_err(tconn, "%s failed, err = %d\n", what, err);
645 } 646 }
646 if (disconnect_on_error) 647 if (disconnect_on_error)
647 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 648 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
648 } 649 }
649 put_net_conf(mdev); 650
650 return sock; 651 return sock;
651} 652}
652 653
653static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev) 654struct accept_wait_data {
655 struct drbd_tconn *tconn;
656 struct socket *s_listen;
657 struct completion door_bell;
658 void (*original_sk_state_change)(struct sock *sk);
659
660};
661
662static void drbd_incoming_connection(struct sock *sk)
654{ 663{
655 int timeo, err; 664 struct accept_wait_data *ad = sk->sk_user_data;
656 struct socket *s_estab = NULL, *s_listen; 665 void (*state_change)(struct sock *sk);
666
667 state_change = ad->original_sk_state_change;
668 if (sk->sk_state == TCP_ESTABLISHED)
669 complete(&ad->door_bell);
670 state_change(sk);
671}
672
673static int prepare_listen_socket(struct drbd_tconn *tconn, struct accept_wait_data *ad)
674{
675 int err, sndbuf_size, rcvbuf_size, my_addr_len;
676 struct sockaddr_in6 my_addr;
677 struct socket *s_listen;
678 struct net_conf *nc;
657 const char *what; 679 const char *what;
658 680
659 if (!get_net_conf(mdev)) 681 rcu_read_lock();
660 return NULL; 682 nc = rcu_dereference(tconn->net_conf);
683 if (!nc) {
684 rcu_read_unlock();
685 return -EIO;
686 }
687 sndbuf_size = nc->sndbuf_size;
688 rcvbuf_size = nc->rcvbuf_size;
689 rcu_read_unlock();
690
691 my_addr_len = min_t(int, tconn->my_addr_len, sizeof(struct sockaddr_in6));
692 memcpy(&my_addr, &tconn->my_addr, my_addr_len);
661 693
662 what = "sock_create_kern"; 694 what = "sock_create_kern";
663 err = sock_create_kern(((struct sockaddr *)mdev->net_conf->my_addr)->sa_family, 695 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
664 SOCK_STREAM, IPPROTO_TCP, &s_listen); 696 SOCK_STREAM, IPPROTO_TCP, &s_listen);
665 if (err) { 697 if (err) {
666 s_listen = NULL; 698 s_listen = NULL;
667 goto out; 699 goto out;
668 } 700 }
669 701
670 timeo = mdev->net_conf->try_connect_int * HZ; 702 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
671 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */ 703 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
672
673 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
674 s_listen->sk->sk_rcvtimeo = timeo;
675 s_listen->sk->sk_sndtimeo = timeo;
676 drbd_setbufsize(s_listen, mdev->net_conf->sndbuf_size,
677 mdev->net_conf->rcvbuf_size);
678 704
679 what = "bind before listen"; 705 what = "bind before listen";
680 err = s_listen->ops->bind(s_listen, 706 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
681 (struct sockaddr *) mdev->net_conf->my_addr,
682 mdev->net_conf->my_addr_len);
683 if (err < 0) 707 if (err < 0)
684 goto out; 708 goto out;
685 709
686 err = drbd_accept(mdev, &what, s_listen, &s_estab); 710 ad->s_listen = s_listen;
711 write_lock_bh(&s_listen->sk->sk_callback_lock);
712 ad->original_sk_state_change = s_listen->sk->sk_state_change;
713 s_listen->sk->sk_state_change = drbd_incoming_connection;
714 s_listen->sk->sk_user_data = ad;
715 write_unlock_bh(&s_listen->sk->sk_callback_lock);
716
717 what = "listen";
718 err = s_listen->ops->listen(s_listen, 5);
719 if (err < 0)
720 goto out;
687 721
722 return 0;
688out: 723out:
689 if (s_listen) 724 if (s_listen)
690 sock_release(s_listen); 725 sock_release(s_listen);
691 if (err < 0) { 726 if (err < 0) {
692 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { 727 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
693 dev_err(DEV, "%s failed, err = %d\n", what, err); 728 conn_err(tconn, "%s failed, err = %d\n", what, err);
694 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 729 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
695 } 730 }
696 } 731 }
697 put_net_conf(mdev);
698 732
699 return s_estab; 733 return -EIO;
700} 734}
701 735
702static int drbd_send_fp(struct drbd_conf *mdev, 736static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
703 struct socket *sock, enum drbd_packets cmd)
704{ 737{
705 struct p_header80 *h = &mdev->data.sbuf.header.h80; 738 write_lock_bh(&sk->sk_callback_lock);
706 739 sk->sk_state_change = ad->original_sk_state_change;
707 return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0); 740 sk->sk_user_data = NULL;
741 write_unlock_bh(&sk->sk_callback_lock);
708} 742}
709 743
710static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock) 744static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn, struct accept_wait_data *ad)
711{ 745{
712 struct p_header80 *h = &mdev->data.rbuf.header.h80; 746 int timeo, connect_int, err = 0;
713 int rr; 747 struct socket *s_estab = NULL;
748 struct net_conf *nc;
749
750 rcu_read_lock();
751 nc = rcu_dereference(tconn->net_conf);
752 if (!nc) {
753 rcu_read_unlock();
754 return NULL;
755 }
756 connect_int = nc->connect_int;
757 rcu_read_unlock();
758
759 timeo = connect_int * HZ;
760 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
761
762 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
763 if (err <= 0)
764 return NULL;
765
766 err = kernel_accept(ad->s_listen, &s_estab, 0);
767 if (err < 0) {
768 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
769 conn_err(tconn, "accept failed, err = %d\n", err);
770 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
771 }
772 }
773
774 if (s_estab)
775 unregister_state_change(s_estab->sk, ad);
776
777 return s_estab;
778}
714 779
715 rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0); 780static int decode_header(struct drbd_tconn *, void *, struct packet_info *);
716 781
717 if (rr == sizeof(*h) && h->magic == BE_DRBD_MAGIC) 782static int send_first_packet(struct drbd_tconn *tconn, struct drbd_socket *sock,
718 return be16_to_cpu(h->command); 783 enum drbd_packet cmd)
784{
785 if (!conn_prepare_command(tconn, sock))
786 return -EIO;
787 return conn_send_command(tconn, sock, cmd, 0, NULL, 0);
788}
719 789
720 return 0xffff; 790static int receive_first_packet(struct drbd_tconn *tconn, struct socket *sock)
791{
792 unsigned int header_size = drbd_header_size(tconn);
793 struct packet_info pi;
794 int err;
795
796 err = drbd_recv_short(sock, tconn->data.rbuf, header_size, 0);
797 if (err != header_size) {
798 if (err >= 0)
799 err = -EIO;
800 return err;
801 }
802 err = decode_header(tconn, tconn->data.rbuf, &pi);
803 if (err)
804 return err;
805 return pi.cmd;
721} 806}
722 807
723/** 808/**
724 * drbd_socket_okay() - Free the socket if its connection is not okay 809 * drbd_socket_okay() - Free the socket if its connection is not okay
725 * @mdev: DRBD device.
726 * @sock: pointer to the pointer to the socket. 810 * @sock: pointer to the pointer to the socket.
727 */ 811 */
728static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock) 812static int drbd_socket_okay(struct socket **sock)
729{ 813{
730 int rr; 814 int rr;
731 char tb[4]; 815 char tb[4];
@@ -733,7 +817,7 @@ static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock)
733 if (!*sock) 817 if (!*sock)
734 return false; 818 return false;
735 819
736 rr = drbd_recv_short(mdev, *sock, tb, 4, MSG_DONTWAIT | MSG_PEEK); 820 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
737 821
738 if (rr > 0 || rr == -EAGAIN) { 822 if (rr > 0 || rr == -EAGAIN) {
739 return true; 823 return true;
@@ -743,6 +827,31 @@ static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock)
743 return false; 827 return false;
744 } 828 }
745} 829}
830/* Gets called if a connection is established, or if a new minor gets created
831 in a connection */
832int drbd_connected(struct drbd_conf *mdev)
833{
834 int err;
835
836 atomic_set(&mdev->packet_seq, 0);
837 mdev->peer_seq = 0;
838
839 mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
840 &mdev->tconn->cstate_mutex :
841 &mdev->own_state_mutex;
842
843 err = drbd_send_sync_param(mdev);
844 if (!err)
845 err = drbd_send_sizes(mdev, 0, 0);
846 if (!err)
847 err = drbd_send_uuids(mdev);
848 if (!err)
849 err = drbd_send_current_state(mdev);
850 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
851 clear_bit(RESIZE_PENDING, &mdev->flags);
852 mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
853 return err;
854}
746 855
747/* 856/*
748 * return values: 857 * return values:
@@ -752,232 +861,315 @@ static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock)
752 * no point in trying again, please go standalone. 861 * no point in trying again, please go standalone.
753 * -2 We do not have a network config... 862 * -2 We do not have a network config...
754 */ 863 */
755static int drbd_connect(struct drbd_conf *mdev) 864static int conn_connect(struct drbd_tconn *tconn)
756{ 865{
757 struct socket *s, *sock, *msock; 866 struct drbd_socket sock, msock;
758 int try, h, ok; 867 struct drbd_conf *mdev;
868 struct net_conf *nc;
869 int vnr, timeout, h, ok;
870 bool discard_my_data;
759 enum drbd_state_rv rv; 871 enum drbd_state_rv rv;
872 struct accept_wait_data ad = {
873 .tconn = tconn,
874 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
875 };
760 876
761 D_ASSERT(!mdev->data.socket); 877 clear_bit(DISCONNECT_SENT, &tconn->flags);
762 878 if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
763 if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS)
764 return -2; 879 return -2;
765 880
766 clear_bit(DISCARD_CONCURRENT, &mdev->flags); 881 mutex_init(&sock.mutex);
882 sock.sbuf = tconn->data.sbuf;
883 sock.rbuf = tconn->data.rbuf;
884 sock.socket = NULL;
885 mutex_init(&msock.mutex);
886 msock.sbuf = tconn->meta.sbuf;
887 msock.rbuf = tconn->meta.rbuf;
888 msock.socket = NULL;
889
890 /* Assume that the peer only understands protocol 80 until we know better. */
891 tconn->agreed_pro_version = 80;
767 892
768 sock = NULL; 893 if (prepare_listen_socket(tconn, &ad))
769 msock = NULL; 894 return 0;
770 895
771 do { 896 do {
772 for (try = 0;;) { 897 struct socket *s;
773 /* 3 tries, this should take less than a second! */
774 s = drbd_try_connect(mdev);
775 if (s || ++try >= 3)
776 break;
777 /* give the other side time to call bind() & listen() */
778 schedule_timeout_interruptible(HZ / 10);
779 }
780 898
899 s = drbd_try_connect(tconn);
781 if (s) { 900 if (s) {
782 if (!sock) { 901 if (!sock.socket) {
783 drbd_send_fp(mdev, s, P_HAND_SHAKE_S); 902 sock.socket = s;
784 sock = s; 903 send_first_packet(tconn, &sock, P_INITIAL_DATA);
785 s = NULL; 904 } else if (!msock.socket) {
786 } else if (!msock) { 905 clear_bit(RESOLVE_CONFLICTS, &tconn->flags);
787 drbd_send_fp(mdev, s, P_HAND_SHAKE_M); 906 msock.socket = s;
788 msock = s; 907 send_first_packet(tconn, &msock, P_INITIAL_META);
789 s = NULL;
790 } else { 908 } else {
791 dev_err(DEV, "Logic error in drbd_connect()\n"); 909 conn_err(tconn, "Logic error in conn_connect()\n");
792 goto out_release_sockets; 910 goto out_release_sockets;
793 } 911 }
794 } 912 }
795 913
796 if (sock && msock) { 914 if (sock.socket && msock.socket) {
797 schedule_timeout_interruptible(mdev->net_conf->ping_timeo*HZ/10); 915 rcu_read_lock();
798 ok = drbd_socket_okay(mdev, &sock); 916 nc = rcu_dereference(tconn->net_conf);
799 ok = drbd_socket_okay(mdev, &msock) && ok; 917 timeout = nc->ping_timeo * HZ / 10;
918 rcu_read_unlock();
919 schedule_timeout_interruptible(timeout);
920 ok = drbd_socket_okay(&sock.socket);
921 ok = drbd_socket_okay(&msock.socket) && ok;
800 if (ok) 922 if (ok)
801 break; 923 break;
802 } 924 }
803 925
804retry: 926retry:
805 s = drbd_wait_for_connect(mdev); 927 s = drbd_wait_for_connect(tconn, &ad);
806 if (s) { 928 if (s) {
807 try = drbd_recv_fp(mdev, s); 929 int fp = receive_first_packet(tconn, s);
808 drbd_socket_okay(mdev, &sock); 930 drbd_socket_okay(&sock.socket);
809 drbd_socket_okay(mdev, &msock); 931 drbd_socket_okay(&msock.socket);
810 switch (try) { 932 switch (fp) {
811 case P_HAND_SHAKE_S: 933 case P_INITIAL_DATA:
812 if (sock) { 934 if (sock.socket) {
813 dev_warn(DEV, "initial packet S crossed\n"); 935 conn_warn(tconn, "initial packet S crossed\n");
814 sock_release(sock); 936 sock_release(sock.socket);
937 sock.socket = s;
938 goto randomize;
815 } 939 }
816 sock = s; 940 sock.socket = s;
817 break; 941 break;
818 case P_HAND_SHAKE_M: 942 case P_INITIAL_META:
819 if (msock) { 943 set_bit(RESOLVE_CONFLICTS, &tconn->flags);
820 dev_warn(DEV, "initial packet M crossed\n"); 944 if (msock.socket) {
821 sock_release(msock); 945 conn_warn(tconn, "initial packet M crossed\n");
946 sock_release(msock.socket);
947 msock.socket = s;
948 goto randomize;
822 } 949 }
823 msock = s; 950 msock.socket = s;
824 set_bit(DISCARD_CONCURRENT, &mdev->flags);
825 break; 951 break;
826 default: 952 default:
827 dev_warn(DEV, "Error receiving initial packet\n"); 953 conn_warn(tconn, "Error receiving initial packet\n");
828 sock_release(s); 954 sock_release(s);
955randomize:
829 if (random32() & 1) 956 if (random32() & 1)
830 goto retry; 957 goto retry;
831 } 958 }
832 } 959 }
833 960
834 if (mdev->state.conn <= C_DISCONNECTING) 961 if (tconn->cstate <= C_DISCONNECTING)
835 goto out_release_sockets; 962 goto out_release_sockets;
836 if (signal_pending(current)) { 963 if (signal_pending(current)) {
837 flush_signals(current); 964 flush_signals(current);
838 smp_rmb(); 965 smp_rmb();
839 if (get_t_state(&mdev->receiver) == Exiting) 966 if (get_t_state(&tconn->receiver) == EXITING)
840 goto out_release_sockets; 967 goto out_release_sockets;
841 } 968 }
842 969
843 if (sock && msock) { 970 ok = drbd_socket_okay(&sock.socket);
844 ok = drbd_socket_okay(mdev, &sock); 971 ok = drbd_socket_okay(&msock.socket) && ok;
845 ok = drbd_socket_okay(mdev, &msock) && ok; 972 } while (!ok);
846 if (ok) 973
847 break; 974 if (ad.s_listen)
848 } 975 sock_release(ad.s_listen);
849 } while (1);
850 976
851 msock->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */ 977 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
852 sock->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */ 978 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
853 979
854 sock->sk->sk_allocation = GFP_NOIO; 980 sock.socket->sk->sk_allocation = GFP_NOIO;
855 msock->sk->sk_allocation = GFP_NOIO; 981 msock.socket->sk->sk_allocation = GFP_NOIO;
856 982
857 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; 983 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
858 msock->sk->sk_priority = TC_PRIO_INTERACTIVE; 984 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
859 985
860 /* NOT YET ... 986 /* NOT YET ...
861 * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; 987 * sock.socket->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
862 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 988 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
863 * first set it to the P_HAND_SHAKE timeout, 989 * first set it to the P_CONNECTION_FEATURES timeout,
864 * which we set to 4x the configured ping_timeout. */ 990 * which we set to 4x the configured ping_timeout. */
865 sock->sk->sk_sndtimeo = 991 rcu_read_lock();
866 sock->sk->sk_rcvtimeo = mdev->net_conf->ping_timeo*4*HZ/10; 992 nc = rcu_dereference(tconn->net_conf);
867 993
868 msock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; 994 sock.socket->sk->sk_sndtimeo =
869 msock->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ; 995 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
996
997 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
998 timeout = nc->timeout * HZ / 10;
999 discard_my_data = nc->discard_my_data;
1000 rcu_read_unlock();
1001
1002 msock.socket->sk->sk_sndtimeo = timeout;
870 1003
871 /* we don't want delays. 1004 /* we don't want delays.
872 * we use TCP_CORK where appropriate, though */ 1005 * we use TCP_CORK where appropriate, though */
873 drbd_tcp_nodelay(sock); 1006 drbd_tcp_nodelay(sock.socket);
874 drbd_tcp_nodelay(msock); 1007 drbd_tcp_nodelay(msock.socket);
875
876 mdev->data.socket = sock;
877 mdev->meta.socket = msock;
878 mdev->last_received = jiffies;
879 1008
880 D_ASSERT(mdev->asender.task == NULL); 1009 tconn->data.socket = sock.socket;
1010 tconn->meta.socket = msock.socket;
1011 tconn->last_received = jiffies;
881 1012
882 h = drbd_do_handshake(mdev); 1013 h = drbd_do_features(tconn);
883 if (h <= 0) 1014 if (h <= 0)
884 return h; 1015 return h;
885 1016
886 if (mdev->cram_hmac_tfm) { 1017 if (tconn->cram_hmac_tfm) {
887 /* drbd_request_state(mdev, NS(conn, WFAuth)); */ 1018 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
888 switch (drbd_do_auth(mdev)) { 1019 switch (drbd_do_auth(tconn)) {
889 case -1: 1020 case -1:
890 dev_err(DEV, "Authentication of peer failed\n"); 1021 conn_err(tconn, "Authentication of peer failed\n");
891 return -1; 1022 return -1;
892 case 0: 1023 case 0:
893 dev_err(DEV, "Authentication of peer failed, trying again.\n"); 1024 conn_err(tconn, "Authentication of peer failed, trying again.\n");
894 return 0; 1025 return 0;
895 } 1026 }
896 } 1027 }
897 1028
898 sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; 1029 tconn->data.socket->sk->sk_sndtimeo = timeout;
899 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 1030 tconn->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
900 1031
901 atomic_set(&mdev->packet_seq, 0); 1032 if (drbd_send_protocol(tconn) == -EOPNOTSUPP)
902 mdev->peer_seq = 0;
903
904 if (drbd_send_protocol(mdev) == -1)
905 return -1; 1033 return -1;
906 set_bit(STATE_SENT, &mdev->flags);
907 drbd_send_sync_param(mdev, &mdev->sync_conf);
908 drbd_send_sizes(mdev, 0, 0);
909 drbd_send_uuids(mdev);
910 drbd_send_current_state(mdev);
911 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
912 clear_bit(RESIZE_PENDING, &mdev->flags);
913 1034
914 spin_lock_irq(&mdev->req_lock); 1035 set_bit(STATE_SENT, &tconn->flags);
915 rv = _drbd_set_state(_NS(mdev, conn, C_WF_REPORT_PARAMS), CS_VERBOSE, NULL); 1036
916 if (mdev->state.conn != C_WF_REPORT_PARAMS) 1037 rcu_read_lock();
917 clear_bit(STATE_SENT, &mdev->flags); 1038 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
918 spin_unlock_irq(&mdev->req_lock); 1039 kref_get(&mdev->kref);
1040 /* Prevent a race between resync-handshake and
1041 * being promoted to Primary.
1042 *
1043 * Grab and release the state mutex, so we know that any current
1044 * drbd_set_role() is finished, and any incoming drbd_set_role
1045 * will see the STATE_SENT flag, and wait for it to be cleared.
1046 */
1047 mutex_lock(mdev->state_mutex);
1048 mutex_unlock(mdev->state_mutex);
1049
1050 rcu_read_unlock();
1051
1052 if (discard_my_data)
1053 set_bit(DISCARD_MY_DATA, &mdev->flags);
1054 else
1055 clear_bit(DISCARD_MY_DATA, &mdev->flags);
1056
1057 drbd_connected(mdev);
1058 kref_put(&mdev->kref, &drbd_minor_destroy);
1059 rcu_read_lock();
1060 }
1061 rcu_read_unlock();
919 1062
920 if (rv < SS_SUCCESS) 1063 rv = conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1064 if (rv < SS_SUCCESS || tconn->cstate != C_WF_REPORT_PARAMS) {
1065 clear_bit(STATE_SENT, &tconn->flags);
921 return 0; 1066 return 0;
1067 }
922 1068
923 drbd_thread_start(&mdev->asender); 1069 drbd_thread_start(&tconn->asender);
924 mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
925 1070
926 return 1; 1071 mutex_lock(&tconn->conf_update);
1072 /* The discard_my_data flag is a single-shot modifier to the next
1073 * connection attempt, the handshake of which is now well underway.
1074 * No need for rcu style copying of the whole struct
1075 * just to clear a single value. */
1076 tconn->net_conf->discard_my_data = 0;
1077 mutex_unlock(&tconn->conf_update);
1078
1079 return h;
927 1080
928out_release_sockets: 1081out_release_sockets:
929 if (sock) 1082 if (ad.s_listen)
930 sock_release(sock); 1083 sock_release(ad.s_listen);
931 if (msock) 1084 if (sock.socket)
932 sock_release(msock); 1085 sock_release(sock.socket);
1086 if (msock.socket)
1087 sock_release(msock.socket);
933 return -1; 1088 return -1;
934} 1089}
935 1090
936static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsigned int *packet_size) 1091static int decode_header(struct drbd_tconn *tconn, void *header, struct packet_info *pi)
937{ 1092{
938 union p_header *h = &mdev->data.rbuf.header; 1093 unsigned int header_size = drbd_header_size(tconn);
939 int r; 1094
940 1095 if (header_size == sizeof(struct p_header100) &&
941 r = drbd_recv(mdev, h, sizeof(*h)); 1096 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
942 if (unlikely(r != sizeof(*h))) { 1097 struct p_header100 *h = header;
943 if (!signal_pending(current)) 1098 if (h->pad != 0) {
944 dev_warn(DEV, "short read expecting header on sock: r=%d\n", r); 1099 conn_err(tconn, "Header padding is not zero\n");
945 return false; 1100 return -EINVAL;
946 } 1101 }
947 1102 pi->vnr = be16_to_cpu(h->volume);
948 if (likely(h->h80.magic == BE_DRBD_MAGIC)) { 1103 pi->cmd = be16_to_cpu(h->command);
949 *cmd = be16_to_cpu(h->h80.command); 1104 pi->size = be32_to_cpu(h->length);
950 *packet_size = be16_to_cpu(h->h80.length); 1105 } else if (header_size == sizeof(struct p_header95) &&
951 } else if (h->h95.magic == BE_DRBD_MAGIC_BIG) { 1106 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
952 *cmd = be16_to_cpu(h->h95.command); 1107 struct p_header95 *h = header;
953 *packet_size = be32_to_cpu(h->h95.length); 1108 pi->cmd = be16_to_cpu(h->command);
1109 pi->size = be32_to_cpu(h->length);
1110 pi->vnr = 0;
1111 } else if (header_size == sizeof(struct p_header80) &&
1112 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1113 struct p_header80 *h = header;
1114 pi->cmd = be16_to_cpu(h->command);
1115 pi->size = be16_to_cpu(h->length);
1116 pi->vnr = 0;
954 } else { 1117 } else {
955 dev_err(DEV, "magic?? on data m: 0x%08x c: %d l: %d\n", 1118 conn_err(tconn, "Wrong magic value 0x%08x in protocol version %d\n",
956 be32_to_cpu(h->h80.magic), 1119 be32_to_cpu(*(__be32 *)header),
957 be16_to_cpu(h->h80.command), 1120 tconn->agreed_pro_version);
958 be16_to_cpu(h->h80.length)); 1121 return -EINVAL;
959 return false;
960 } 1122 }
961 mdev->last_received = jiffies; 1123 pi->data = header + header_size;
1124 return 0;
1125}
962 1126
963 return true; 1127static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
1128{
1129 void *buffer = tconn->data.rbuf;
1130 int err;
1131
1132 err = drbd_recv_all_warn(tconn, buffer, drbd_header_size(tconn));
1133 if (err)
1134 return err;
1135
1136 err = decode_header(tconn, buffer, pi);
1137 tconn->last_received = jiffies;
1138
1139 return err;
964} 1140}
965 1141
966static void drbd_flush(struct drbd_conf *mdev) 1142static void drbd_flush(struct drbd_tconn *tconn)
967{ 1143{
968 int rv; 1144 int rv;
1145 struct drbd_conf *mdev;
1146 int vnr;
969 1147
970 if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) { 1148 if (tconn->write_ordering >= WO_bdev_flush) {
971 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL, 1149 rcu_read_lock();
972 NULL); 1150 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
973 if (rv) { 1151 if (!get_ldev(mdev))
974 dev_info(DEV, "local disk flush failed with status %d\n", rv); 1152 continue;
975 /* would rather check on EOPNOTSUPP, but that is not reliable. 1153 kref_get(&mdev->kref);
976 * don't try again for ANY return value != 0 1154 rcu_read_unlock();
977 * if (rv == -EOPNOTSUPP) */ 1155
978 drbd_bump_write_ordering(mdev, WO_drain_io); 1156 rv = blkdev_issue_flush(mdev->ldev->backing_bdev,
1157 GFP_NOIO, NULL);
1158 if (rv) {
1159 dev_info(DEV, "local disk flush failed with status %d\n", rv);
1160 /* would rather check on EOPNOTSUPP, but that is not reliable.
1161 * don't try again for ANY return value != 0
1162 * if (rv == -EOPNOTSUPP) */
1163 drbd_bump_write_ordering(tconn, WO_drain_io);
1164 }
1165 put_ldev(mdev);
1166 kref_put(&mdev->kref, &drbd_minor_destroy);
1167
1168 rcu_read_lock();
1169 if (rv)
1170 break;
979 } 1171 }
980 put_ldev(mdev); 1172 rcu_read_unlock();
981 } 1173 }
982} 1174}
983 1175
@@ -987,7 +1179,7 @@ static void drbd_flush(struct drbd_conf *mdev)
987 * @epoch: Epoch object. 1179 * @epoch: Epoch object.
988 * @ev: Epoch event. 1180 * @ev: Epoch event.
989 */ 1181 */
990static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, 1182static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *tconn,
991 struct drbd_epoch *epoch, 1183 struct drbd_epoch *epoch,
992 enum epoch_event ev) 1184 enum epoch_event ev)
993{ 1185{
@@ -995,7 +1187,7 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
995 struct drbd_epoch *next_epoch; 1187 struct drbd_epoch *next_epoch;
996 enum finish_epoch rv = FE_STILL_LIVE; 1188 enum finish_epoch rv = FE_STILL_LIVE;
997 1189
998 spin_lock(&mdev->epoch_lock); 1190 spin_lock(&tconn->epoch_lock);
999 do { 1191 do {
1000 next_epoch = NULL; 1192 next_epoch = NULL;
1001 1193
@@ -1017,18 +1209,22 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1017 atomic_read(&epoch->active) == 0 && 1209 atomic_read(&epoch->active) == 0 &&
1018 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) { 1210 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
1019 if (!(ev & EV_CLEANUP)) { 1211 if (!(ev & EV_CLEANUP)) {
1020 spin_unlock(&mdev->epoch_lock); 1212 spin_unlock(&tconn->epoch_lock);
1021 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size); 1213 drbd_send_b_ack(epoch->tconn, epoch->barrier_nr, epoch_size);
1022 spin_lock(&mdev->epoch_lock); 1214 spin_lock(&tconn->epoch_lock);
1023 } 1215 }
1216#if 0
1217 /* FIXME: dec unacked on connection, once we have
1218 * something to count pending connection packets in. */
1024 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) 1219 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
1025 dec_unacked(mdev); 1220 dec_unacked(epoch->tconn);
1221#endif
1026 1222
1027 if (mdev->current_epoch != epoch) { 1223 if (tconn->current_epoch != epoch) {
1028 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list); 1224 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1029 list_del(&epoch->list); 1225 list_del(&epoch->list);
1030 ev = EV_BECAME_LAST | (ev & EV_CLEANUP); 1226 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1031 mdev->epochs--; 1227 tconn->epochs--;
1032 kfree(epoch); 1228 kfree(epoch);
1033 1229
1034 if (rv == FE_STILL_LIVE) 1230 if (rv == FE_STILL_LIVE)
@@ -1039,7 +1235,6 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1039 /* atomic_set(&epoch->active, 0); is already zero */ 1235 /* atomic_set(&epoch->active, 0); is already zero */
1040 if (rv == FE_STILL_LIVE) 1236 if (rv == FE_STILL_LIVE)
1041 rv = FE_RECYCLED; 1237 rv = FE_RECYCLED;
1042 wake_up(&mdev->ee_wait);
1043 } 1238 }
1044 } 1239 }
1045 1240
@@ -1049,40 +1244,52 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1049 epoch = next_epoch; 1244 epoch = next_epoch;
1050 } while (1); 1245 } while (1);
1051 1246
1052 spin_unlock(&mdev->epoch_lock); 1247 spin_unlock(&tconn->epoch_lock);
1053 1248
1054 return rv; 1249 return rv;
1055} 1250}
1056 1251
1057/** 1252/**
1058 * drbd_bump_write_ordering() - Fall back to an other write ordering method 1253 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1059 * @mdev: DRBD device. 1254 * @tconn: DRBD connection.
1060 * @wo: Write ordering method to try. 1255 * @wo: Write ordering method to try.
1061 */ 1256 */
1062void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local) 1257void drbd_bump_write_ordering(struct drbd_tconn *tconn, enum write_ordering_e wo)
1063{ 1258{
1259 struct disk_conf *dc;
1260 struct drbd_conf *mdev;
1064 enum write_ordering_e pwo; 1261 enum write_ordering_e pwo;
1262 int vnr;
1065 static char *write_ordering_str[] = { 1263 static char *write_ordering_str[] = {
1066 [WO_none] = "none", 1264 [WO_none] = "none",
1067 [WO_drain_io] = "drain", 1265 [WO_drain_io] = "drain",
1068 [WO_bdev_flush] = "flush", 1266 [WO_bdev_flush] = "flush",
1069 }; 1267 };
1070 1268
1071 pwo = mdev->write_ordering; 1269 pwo = tconn->write_ordering;
1072 wo = min(pwo, wo); 1270 wo = min(pwo, wo);
1073 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush) 1271 rcu_read_lock();
1074 wo = WO_drain_io; 1272 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1075 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain) 1273 if (!get_ldev_if_state(mdev, D_ATTACHING))
1076 wo = WO_none; 1274 continue;
1077 mdev->write_ordering = wo; 1275 dc = rcu_dereference(mdev->ldev->disk_conf);
1078 if (pwo != mdev->write_ordering || wo == WO_bdev_flush) 1276
1079 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]); 1277 if (wo == WO_bdev_flush && !dc->disk_flushes)
1278 wo = WO_drain_io;
1279 if (wo == WO_drain_io && !dc->disk_drain)
1280 wo = WO_none;
1281 put_ldev(mdev);
1282 }
1283 rcu_read_unlock();
1284 tconn->write_ordering = wo;
1285 if (pwo != tconn->write_ordering || wo == WO_bdev_flush)
1286 conn_info(tconn, "Method to ensure write ordering: %s\n", write_ordering_str[tconn->write_ordering]);
1080} 1287}
1081 1288
1082/** 1289/**
1083 * drbd_submit_ee() 1290 * drbd_submit_peer_request()
1084 * @mdev: DRBD device. 1291 * @mdev: DRBD device.
1085 * @e: epoch entry 1292 * @peer_req: peer request
1086 * @rw: flag field, see bio->bi_rw 1293 * @rw: flag field, see bio->bi_rw
1087 * 1294 *
1088 * May spread the pages to multiple bios, 1295 * May spread the pages to multiple bios,
@@ -1096,14 +1303,15 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo)
1096 * on certain Xen deployments. 1303 * on certain Xen deployments.
1097 */ 1304 */
1098/* TODO allocate from our own bio_set. */ 1305/* TODO allocate from our own bio_set. */
1099int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, 1306int drbd_submit_peer_request(struct drbd_conf *mdev,
1100 const unsigned rw, const int fault_type) 1307 struct drbd_peer_request *peer_req,
1308 const unsigned rw, const int fault_type)
1101{ 1309{
1102 struct bio *bios = NULL; 1310 struct bio *bios = NULL;
1103 struct bio *bio; 1311 struct bio *bio;
1104 struct page *page = e->pages; 1312 struct page *page = peer_req->pages;
1105 sector_t sector = e->sector; 1313 sector_t sector = peer_req->i.sector;
1106 unsigned ds = e->size; 1314 unsigned ds = peer_req->i.size;
1107 unsigned n_bios = 0; 1315 unsigned n_bios = 0;
1108 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; 1316 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
1109 int err = -ENOMEM; 1317 int err = -ENOMEM;
@@ -1122,12 +1330,12 @@ next_bio:
1122 dev_err(DEV, "submit_ee: Allocation of a bio failed\n"); 1330 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1123 goto fail; 1331 goto fail;
1124 } 1332 }
1125 /* > e->sector, unless this is the first bio */ 1333 /* > peer_req->i.sector, unless this is the first bio */
1126 bio->bi_sector = sector; 1334 bio->bi_sector = sector;
1127 bio->bi_bdev = mdev->ldev->backing_bdev; 1335 bio->bi_bdev = mdev->ldev->backing_bdev;
1128 bio->bi_rw = rw; 1336 bio->bi_rw = rw;
1129 bio->bi_private = e; 1337 bio->bi_private = peer_req;
1130 bio->bi_end_io = drbd_endio_sec; 1338 bio->bi_end_io = drbd_peer_request_endio;
1131 1339
1132 bio->bi_next = bios; 1340 bio->bi_next = bios;
1133 bios = bio; 1341 bios = bio;
@@ -1156,7 +1364,7 @@ next_bio:
1156 D_ASSERT(page == NULL); 1364 D_ASSERT(page == NULL);
1157 D_ASSERT(ds == 0); 1365 D_ASSERT(ds == 0);
1158 1366
1159 atomic_set(&e->pending_bios, n_bios); 1367 atomic_set(&peer_req->pending_bios, n_bios);
1160 do { 1368 do {
1161 bio = bios; 1369 bio = bios;
1162 bios = bios->bi_next; 1370 bios = bios->bi_next;
@@ -1175,26 +1383,57 @@ fail:
1175 return err; 1383 return err;
1176} 1384}
1177 1385
1178static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 1386static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
1387 struct drbd_peer_request *peer_req)
1388{
1389 struct drbd_interval *i = &peer_req->i;
1390
1391 drbd_remove_interval(&mdev->write_requests, i);
1392 drbd_clear_interval(i);
1393
1394 /* Wake up any processes waiting for this peer request to complete. */
1395 if (i->waiting)
1396 wake_up(&mdev->misc_wait);
1397}
1398
1399void conn_wait_active_ee_empty(struct drbd_tconn *tconn)
1400{
1401 struct drbd_conf *mdev;
1402 int vnr;
1403
1404 rcu_read_lock();
1405 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1406 kref_get(&mdev->kref);
1407 rcu_read_unlock();
1408 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
1409 kref_put(&mdev->kref, &drbd_minor_destroy);
1410 rcu_read_lock();
1411 }
1412 rcu_read_unlock();
1413}
1414
1415static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi)
1179{ 1416{
1180 int rv; 1417 int rv;
1181 struct p_barrier *p = &mdev->data.rbuf.barrier; 1418 struct p_barrier *p = pi->data;
1182 struct drbd_epoch *epoch; 1419 struct drbd_epoch *epoch;
1183 1420
1184 inc_unacked(mdev); 1421 /* FIXME these are unacked on connection,
1185 1422 * not a specific (peer)device.
1186 mdev->current_epoch->barrier_nr = p->barrier; 1423 */
1187 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR); 1424 tconn->current_epoch->barrier_nr = p->barrier;
1425 tconn->current_epoch->tconn = tconn;
1426 rv = drbd_may_finish_epoch(tconn, tconn->current_epoch, EV_GOT_BARRIER_NR);
1188 1427
1189 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from 1428 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1190 * the activity log, which means it would not be resynced in case the 1429 * the activity log, which means it would not be resynced in case the
1191 * R_PRIMARY crashes now. 1430 * R_PRIMARY crashes now.
1192 * Therefore we must send the barrier_ack after the barrier request was 1431 * Therefore we must send the barrier_ack after the barrier request was
1193 * completed. */ 1432 * completed. */
1194 switch (mdev->write_ordering) { 1433 switch (tconn->write_ordering) {
1195 case WO_none: 1434 case WO_none:
1196 if (rv == FE_RECYCLED) 1435 if (rv == FE_RECYCLED)
1197 return true; 1436 return 0;
1198 1437
1199 /* receiver context, in the writeout path of the other node. 1438 /* receiver context, in the writeout path of the other node.
1200 * avoid potential distributed deadlock */ 1439 * avoid potential distributed deadlock */
@@ -1202,81 +1441,75 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign
1202 if (epoch) 1441 if (epoch)
1203 break; 1442 break;
1204 else 1443 else
1205 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n"); 1444 conn_warn(tconn, "Allocation of an epoch failed, slowing down\n");
1206 /* Fall through */ 1445 /* Fall through */
1207 1446
1208 case WO_bdev_flush: 1447 case WO_bdev_flush:
1209 case WO_drain_io: 1448 case WO_drain_io:
1210 drbd_wait_ee_list_empty(mdev, &mdev->active_ee); 1449 conn_wait_active_ee_empty(tconn);
1211 drbd_flush(mdev); 1450 drbd_flush(tconn);
1212 1451
1213 if (atomic_read(&mdev->current_epoch->epoch_size)) { 1452 if (atomic_read(&tconn->current_epoch->epoch_size)) {
1214 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO); 1453 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1215 if (epoch) 1454 if (epoch)
1216 break; 1455 break;
1217 } 1456 }
1218 1457
1219 epoch = mdev->current_epoch; 1458 return 0;
1220 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1221
1222 D_ASSERT(atomic_read(&epoch->active) == 0);
1223 D_ASSERT(epoch->flags == 0);
1224
1225 return true;
1226 default: 1459 default:
1227 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering); 1460 conn_err(tconn, "Strangeness in tconn->write_ordering %d\n", tconn->write_ordering);
1228 return false; 1461 return -EIO;
1229 } 1462 }
1230 1463
1231 epoch->flags = 0; 1464 epoch->flags = 0;
1232 atomic_set(&epoch->epoch_size, 0); 1465 atomic_set(&epoch->epoch_size, 0);
1233 atomic_set(&epoch->active, 0); 1466 atomic_set(&epoch->active, 0);
1234 1467
1235 spin_lock(&mdev->epoch_lock); 1468 spin_lock(&tconn->epoch_lock);
1236 if (atomic_read(&mdev->current_epoch->epoch_size)) { 1469 if (atomic_read(&tconn->current_epoch->epoch_size)) {
1237 list_add(&epoch->list, &mdev->current_epoch->list); 1470 list_add(&epoch->list, &tconn->current_epoch->list);
1238 mdev->current_epoch = epoch; 1471 tconn->current_epoch = epoch;
1239 mdev->epochs++; 1472 tconn->epochs++;
1240 } else { 1473 } else {
1241 /* The current_epoch got recycled while we allocated this one... */ 1474 /* The current_epoch got recycled while we allocated this one... */
1242 kfree(epoch); 1475 kfree(epoch);
1243 } 1476 }
1244 spin_unlock(&mdev->epoch_lock); 1477 spin_unlock(&tconn->epoch_lock);
1245 1478
1246 return true; 1479 return 0;
1247} 1480}
1248 1481
1249/* used from receive_RSDataReply (recv_resync_read) 1482/* used from receive_RSDataReply (recv_resync_read)
1250 * and from receive_Data */ 1483 * and from receive_Data */
1251static struct drbd_epoch_entry * 1484static struct drbd_peer_request *
1252read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __must_hold(local) 1485read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1486 int data_size) __must_hold(local)
1253{ 1487{
1254 const sector_t capacity = drbd_get_capacity(mdev->this_bdev); 1488 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
1255 struct drbd_epoch_entry *e; 1489 struct drbd_peer_request *peer_req;
1256 struct page *page; 1490 struct page *page;
1257 int dgs, ds, rr; 1491 int dgs, ds, err;
1258 void *dig_in = mdev->int_dig_in; 1492 void *dig_in = mdev->tconn->int_dig_in;
1259 void *dig_vv = mdev->int_dig_vv; 1493 void *dig_vv = mdev->tconn->int_dig_vv;
1260 unsigned long *data; 1494 unsigned long *data;
1261 1495
1262 dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? 1496 dgs = 0;
1263 crypto_hash_digestsize(mdev->integrity_r_tfm) : 0; 1497 if (mdev->tconn->peer_integrity_tfm) {
1264 1498 dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
1265 if (dgs) { 1499 /*
1266 rr = drbd_recv(mdev, dig_in, dgs); 1500 * FIXME: Receive the incoming digest into the receive buffer
1267 if (rr != dgs) { 1501 * here, together with its struct p_data?
1268 if (!signal_pending(current)) 1502 */
1269 dev_warn(DEV, 1503 err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
1270 "short read receiving data digest: read %d expected %d\n", 1504 if (err)
1271 rr, dgs);
1272 return NULL; 1505 return NULL;
1273 } 1506 data_size -= dgs;
1274 } 1507 }
1275 1508
1276 data_size -= dgs; 1509 if (!expect(IS_ALIGNED(data_size, 512)))
1277 1510 return NULL;
1278 ERR_IF(data_size & 0x1ff) return NULL; 1511 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1279 ERR_IF(data_size > DRBD_MAX_BIO_SIZE) return NULL; 1512 return NULL;
1280 1513
1281 /* even though we trust out peer, 1514 /* even though we trust out peer,
1282 * we sometimes have to double check. */ 1515 * we sometimes have to double check. */
@@ -1291,47 +1524,42 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __
1291 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD 1524 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1292 * "criss-cross" setup, that might cause write-out on some other DRBD, 1525 * "criss-cross" setup, that might cause write-out on some other DRBD,
1293 * which in turn might block on the other node at this very place. */ 1526 * which in turn might block on the other node at this very place. */
1294 e = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO); 1527 peer_req = drbd_alloc_peer_req(mdev, id, sector, data_size, GFP_NOIO);
1295 if (!e) 1528 if (!peer_req)
1296 return NULL; 1529 return NULL;
1297 1530
1298 if (!data_size) 1531 if (!data_size)
1299 return e; 1532 return peer_req;
1300 1533
1301 ds = data_size; 1534 ds = data_size;
1302 page = e->pages; 1535 page = peer_req->pages;
1303 page_chain_for_each(page) { 1536 page_chain_for_each(page) {
1304 unsigned len = min_t(int, ds, PAGE_SIZE); 1537 unsigned len = min_t(int, ds, PAGE_SIZE);
1305 data = kmap(page); 1538 data = kmap(page);
1306 rr = drbd_recv(mdev, data, len); 1539 err = drbd_recv_all_warn(mdev->tconn, data, len);
1307 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) { 1540 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
1308 dev_err(DEV, "Fault injection: Corrupting data on receive\n"); 1541 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1309 data[0] = data[0] ^ (unsigned long)-1; 1542 data[0] = data[0] ^ (unsigned long)-1;
1310 } 1543 }
1311 kunmap(page); 1544 kunmap(page);
1312 if (rr != len) { 1545 if (err) {
1313 drbd_free_ee(mdev, e); 1546 drbd_free_peer_req(mdev, peer_req);
1314 if (!signal_pending(current))
1315 dev_warn(DEV, "short read receiving data: read %d expected %d\n",
1316 rr, len);
1317 return NULL; 1547 return NULL;
1318 } 1548 }
1319 ds -= rr; 1549 ds -= len;
1320 } 1550 }
1321 1551
1322 if (dgs) { 1552 if (dgs) {
1323 drbd_csum_ee(mdev, mdev->integrity_r_tfm, e, dig_vv); 1553 drbd_csum_ee(mdev, mdev->tconn->peer_integrity_tfm, peer_req, dig_vv);
1324 if (memcmp(dig_in, dig_vv, dgs)) { 1554 if (memcmp(dig_in, dig_vv, dgs)) {
1325 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n", 1555 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1326 (unsigned long long)sector, data_size); 1556 (unsigned long long)sector, data_size);
1327 drbd_bcast_ee(mdev, "digest failed", 1557 drbd_free_peer_req(mdev, peer_req);
1328 dgs, dig_in, dig_vv, e);
1329 drbd_free_ee(mdev, e);
1330 return NULL; 1558 return NULL;
1331 } 1559 }
1332 } 1560 }
1333 mdev->recv_cnt += data_size>>9; 1561 mdev->recv_cnt += data_size>>9;
1334 return e; 1562 return peer_req;
1335} 1563}
1336 1564
1337/* drbd_drain_block() just takes a data block 1565/* drbd_drain_block() just takes a data block
@@ -1340,30 +1568,26 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __
1340static int drbd_drain_block(struct drbd_conf *mdev, int data_size) 1568static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1341{ 1569{
1342 struct page *page; 1570 struct page *page;
1343 int rr, rv = 1; 1571 int err = 0;
1344 void *data; 1572 void *data;
1345 1573
1346 if (!data_size) 1574 if (!data_size)
1347 return true; 1575 return 0;
1348 1576
1349 page = drbd_pp_alloc(mdev, 1, 1); 1577 page = drbd_alloc_pages(mdev, 1, 1);
1350 1578
1351 data = kmap(page); 1579 data = kmap(page);
1352 while (data_size) { 1580 while (data_size) {
1353 rr = drbd_recv(mdev, data, min_t(int, data_size, PAGE_SIZE)); 1581 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1354 if (rr != min_t(int, data_size, PAGE_SIZE)) { 1582
1355 rv = 0; 1583 err = drbd_recv_all_warn(mdev->tconn, data, len);
1356 if (!signal_pending(current)) 1584 if (err)
1357 dev_warn(DEV,
1358 "short read receiving data: read %d expected %d\n",
1359 rr, min_t(int, data_size, PAGE_SIZE));
1360 break; 1585 break;
1361 } 1586 data_size -= len;
1362 data_size -= rr;
1363 } 1587 }
1364 kunmap(page); 1588 kunmap(page);
1365 drbd_pp_free(mdev, page, 0); 1589 drbd_free_pages(mdev, page, 0);
1366 return rv; 1590 return err;
1367} 1591}
1368 1592
1369static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, 1593static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
@@ -1371,26 +1595,19 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1371{ 1595{
1372 struct bio_vec *bvec; 1596 struct bio_vec *bvec;
1373 struct bio *bio; 1597 struct bio *bio;
1374 int dgs, rr, i, expect; 1598 int dgs, err, i, expect;
1375 void *dig_in = mdev->int_dig_in; 1599 void *dig_in = mdev->tconn->int_dig_in;
1376 void *dig_vv = mdev->int_dig_vv; 1600 void *dig_vv = mdev->tconn->int_dig_vv;
1377
1378 dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ?
1379 crypto_hash_digestsize(mdev->integrity_r_tfm) : 0;
1380 1601
1381 if (dgs) { 1602 dgs = 0;
1382 rr = drbd_recv(mdev, dig_in, dgs); 1603 if (mdev->tconn->peer_integrity_tfm) {
1383 if (rr != dgs) { 1604 dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
1384 if (!signal_pending(current)) 1605 err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
1385 dev_warn(DEV, 1606 if (err)
1386 "short read receiving data reply digest: read %d expected %d\n", 1607 return err;
1387 rr, dgs); 1608 data_size -= dgs;
1388 return 0;
1389 }
1390 } 1609 }
1391 1610
1392 data_size -= dgs;
1393
1394 /* optimistically update recv_cnt. if receiving fails below, 1611 /* optimistically update recv_cnt. if receiving fails below,
1395 * we disconnect anyways, and counters will be reset. */ 1612 * we disconnect anyways, and counters will be reset. */
1396 mdev->recv_cnt += data_size>>9; 1613 mdev->recv_cnt += data_size>>9;
@@ -1399,63 +1616,61 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1399 D_ASSERT(sector == bio->bi_sector); 1616 D_ASSERT(sector == bio->bi_sector);
1400 1617
1401 bio_for_each_segment(bvec, bio, i) { 1618 bio_for_each_segment(bvec, bio, i) {
1619 void *mapped = kmap(bvec->bv_page) + bvec->bv_offset;
1402 expect = min_t(int, data_size, bvec->bv_len); 1620 expect = min_t(int, data_size, bvec->bv_len);
1403 rr = drbd_recv(mdev, 1621 err = drbd_recv_all_warn(mdev->tconn, mapped, expect);
1404 kmap(bvec->bv_page)+bvec->bv_offset,
1405 expect);
1406 kunmap(bvec->bv_page); 1622 kunmap(bvec->bv_page);
1407 if (rr != expect) { 1623 if (err)
1408 if (!signal_pending(current)) 1624 return err;
1409 dev_warn(DEV, "short read receiving data reply: " 1625 data_size -= expect;
1410 "read %d expected %d\n",
1411 rr, expect);
1412 return 0;
1413 }
1414 data_size -= rr;
1415 } 1626 }
1416 1627
1417 if (dgs) { 1628 if (dgs) {
1418 drbd_csum_bio(mdev, mdev->integrity_r_tfm, bio, dig_vv); 1629 drbd_csum_bio(mdev, mdev->tconn->peer_integrity_tfm, bio, dig_vv);
1419 if (memcmp(dig_in, dig_vv, dgs)) { 1630 if (memcmp(dig_in, dig_vv, dgs)) {
1420 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n"); 1631 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
1421 return 0; 1632 return -EINVAL;
1422 } 1633 }
1423 } 1634 }
1424 1635
1425 D_ASSERT(data_size == 0); 1636 D_ASSERT(data_size == 0);
1426 return 1; 1637 return 0;
1427} 1638}
1428 1639
1429/* e_end_resync_block() is called via 1640/*
1430 * drbd_process_done_ee() by asender only */ 1641 * e_end_resync_block() is called in asender context via
1431static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused) 1642 * drbd_finish_peer_reqs().
1643 */
1644static int e_end_resync_block(struct drbd_work *w, int unused)
1432{ 1645{
1433 struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; 1646 struct drbd_peer_request *peer_req =
1434 sector_t sector = e->sector; 1647 container_of(w, struct drbd_peer_request, w);
1435 int ok; 1648 struct drbd_conf *mdev = w->mdev;
1649 sector_t sector = peer_req->i.sector;
1650 int err;
1436 1651
1437 D_ASSERT(hlist_unhashed(&e->collision)); 1652 D_ASSERT(drbd_interval_empty(&peer_req->i));
1438 1653
1439 if (likely((e->flags & EE_WAS_ERROR) == 0)) { 1654 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1440 drbd_set_in_sync(mdev, sector, e->size); 1655 drbd_set_in_sync(mdev, sector, peer_req->i.size);
1441 ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, e); 1656 err = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
1442 } else { 1657 } else {
1443 /* Record failure to sync */ 1658 /* Record failure to sync */
1444 drbd_rs_failed_io(mdev, sector, e->size); 1659 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
1445 1660
1446 ok = drbd_send_ack(mdev, P_NEG_ACK, e); 1661 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
1447 } 1662 }
1448 dec_unacked(mdev); 1663 dec_unacked(mdev);
1449 1664
1450 return ok; 1665 return err;
1451} 1666}
1452 1667
1453static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local) 1668static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1454{ 1669{
1455 struct drbd_epoch_entry *e; 1670 struct drbd_peer_request *peer_req;
1456 1671
1457 e = read_in_block(mdev, ID_SYNCER, sector, data_size); 1672 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1458 if (!e) 1673 if (!peer_req)
1459 goto fail; 1674 goto fail;
1460 1675
1461 dec_rs_pending(mdev); 1676 dec_rs_pending(mdev);
@@ -1464,64 +1679,88 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si
1464 /* corresponding dec_unacked() in e_end_resync_block() 1679 /* corresponding dec_unacked() in e_end_resync_block()
1465 * respective _drbd_clear_done_ee */ 1680 * respective _drbd_clear_done_ee */
1466 1681
1467 e->w.cb = e_end_resync_block; 1682 peer_req->w.cb = e_end_resync_block;
1468 1683
1469 spin_lock_irq(&mdev->req_lock); 1684 spin_lock_irq(&mdev->tconn->req_lock);
1470 list_add(&e->w.list, &mdev->sync_ee); 1685 list_add(&peer_req->w.list, &mdev->sync_ee);
1471 spin_unlock_irq(&mdev->req_lock); 1686 spin_unlock_irq(&mdev->tconn->req_lock);
1472 1687
1473 atomic_add(data_size >> 9, &mdev->rs_sect_ev); 1688 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
1474 if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) 1689 if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
1475 return true; 1690 return 0;
1476 1691
1477 /* don't care for the reason here */ 1692 /* don't care for the reason here */
1478 dev_err(DEV, "submit failed, triggering re-connect\n"); 1693 dev_err(DEV, "submit failed, triggering re-connect\n");
1479 spin_lock_irq(&mdev->req_lock); 1694 spin_lock_irq(&mdev->tconn->req_lock);
1480 list_del(&e->w.list); 1695 list_del(&peer_req->w.list);
1481 spin_unlock_irq(&mdev->req_lock); 1696 spin_unlock_irq(&mdev->tconn->req_lock);
1482 1697
1483 drbd_free_ee(mdev, e); 1698 drbd_free_peer_req(mdev, peer_req);
1484fail: 1699fail:
1485 put_ldev(mdev); 1700 put_ldev(mdev);
1486 return false; 1701 return -EIO;
1487} 1702}
1488 1703
1489static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 1704static struct drbd_request *
1705find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1706 sector_t sector, bool missing_ok, const char *func)
1490{ 1707{
1491 struct drbd_request *req; 1708 struct drbd_request *req;
1709
1710 /* Request object according to our peer */
1711 req = (struct drbd_request *)(unsigned long)id;
1712 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
1713 return req;
1714 if (!missing_ok) {
1715 dev_err(DEV, "%s: failed to find request 0x%lx, sector %llus\n", func,
1716 (unsigned long)id, (unsigned long long)sector);
1717 }
1718 return NULL;
1719}
1720
1721static int receive_DataReply(struct drbd_tconn *tconn, struct packet_info *pi)
1722{
1723 struct drbd_conf *mdev;
1724 struct drbd_request *req;
1492 sector_t sector; 1725 sector_t sector;
1493 int ok; 1726 int err;
1494 struct p_data *p = &mdev->data.rbuf.data; 1727 struct p_data *p = pi->data;
1728
1729 mdev = vnr_to_mdev(tconn, pi->vnr);
1730 if (!mdev)
1731 return -EIO;
1495 1732
1496 sector = be64_to_cpu(p->sector); 1733 sector = be64_to_cpu(p->sector);
1497 1734
1498 spin_lock_irq(&mdev->req_lock); 1735 spin_lock_irq(&mdev->tconn->req_lock);
1499 req = _ar_id_to_req(mdev, p->block_id, sector); 1736 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
1500 spin_unlock_irq(&mdev->req_lock); 1737 spin_unlock_irq(&mdev->tconn->req_lock);
1501 if (unlikely(!req)) { 1738 if (unlikely(!req))
1502 dev_err(DEV, "Got a corrupt block_id/sector pair(1).\n"); 1739 return -EIO;
1503 return false;
1504 }
1505 1740
1506 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid 1741 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
1507 * special casing it there for the various failure cases. 1742 * special casing it there for the various failure cases.
1508 * still no race with drbd_fail_pending_reads */ 1743 * still no race with drbd_fail_pending_reads */
1509 ok = recv_dless_read(mdev, req, sector, data_size); 1744 err = recv_dless_read(mdev, req, sector, pi->size);
1510 1745 if (!err)
1511 if (ok) 1746 req_mod(req, DATA_RECEIVED);
1512 req_mod(req, data_received);
1513 /* else: nothing. handled from drbd_disconnect... 1747 /* else: nothing. handled from drbd_disconnect...
1514 * I don't think we may complete this just yet 1748 * I don't think we may complete this just yet
1515 * in case we are "on-disconnect: freeze" */ 1749 * in case we are "on-disconnect: freeze" */
1516 1750
1517 return ok; 1751 return err;
1518} 1752}
1519 1753
1520static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 1754static int receive_RSDataReply(struct drbd_tconn *tconn, struct packet_info *pi)
1521{ 1755{
1756 struct drbd_conf *mdev;
1522 sector_t sector; 1757 sector_t sector;
1523 int ok; 1758 int err;
1524 struct p_data *p = &mdev->data.rbuf.data; 1759 struct p_data *p = pi->data;
1760
1761 mdev = vnr_to_mdev(tconn, pi->vnr);
1762 if (!mdev)
1763 return -EIO;
1525 1764
1526 sector = be64_to_cpu(p->sector); 1765 sector = be64_to_cpu(p->sector);
1527 D_ASSERT(p->block_id == ID_SYNCER); 1766 D_ASSERT(p->block_id == ID_SYNCER);
@@ -1529,42 +1768,63 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packets cmd, un
1529 if (get_ldev(mdev)) { 1768 if (get_ldev(mdev)) {
1530 /* data is submitted to disk within recv_resync_read. 1769 /* data is submitted to disk within recv_resync_read.
1531 * corresponding put_ldev done below on error, 1770 * corresponding put_ldev done below on error,
1532 * or in drbd_endio_write_sec. */ 1771 * or in drbd_peer_request_endio. */
1533 ok = recv_resync_read(mdev, sector, data_size); 1772 err = recv_resync_read(mdev, sector, pi->size);
1534 } else { 1773 } else {
1535 if (__ratelimit(&drbd_ratelimit_state)) 1774 if (__ratelimit(&drbd_ratelimit_state))
1536 dev_err(DEV, "Can not write resync data to local disk.\n"); 1775 dev_err(DEV, "Can not write resync data to local disk.\n");
1537 1776
1538 ok = drbd_drain_block(mdev, data_size); 1777 err = drbd_drain_block(mdev, pi->size);
1539 1778
1540 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size); 1779 drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
1541 } 1780 }
1542 1781
1543 atomic_add(data_size >> 9, &mdev->rs_sect_in); 1782 atomic_add(pi->size >> 9, &mdev->rs_sect_in);
1544 1783
1545 return ok; 1784 return err;
1546} 1785}
1547 1786
1548/* e_end_block() is called via drbd_process_done_ee(). 1787static void restart_conflicting_writes(struct drbd_conf *mdev,
1549 * this means this function only runs in the asender thread 1788 sector_t sector, int size)
1550 */
1551static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1552{ 1789{
1553 struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; 1790 struct drbd_interval *i;
1554 sector_t sector = e->sector; 1791 struct drbd_request *req;
1555 int ok = 1, pcmd;
1556 1792
1557 if (mdev->net_conf->wire_protocol == DRBD_PROT_C) { 1793 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1558 if (likely((e->flags & EE_WAS_ERROR) == 0)) { 1794 if (!i->local)
1795 continue;
1796 req = container_of(i, struct drbd_request, i);
1797 if (req->rq_state & RQ_LOCAL_PENDING ||
1798 !(req->rq_state & RQ_POSTPONED))
1799 continue;
1800 /* as it is RQ_POSTPONED, this will cause it to
1801 * be queued on the retry workqueue. */
1802 __req_mod(req, CONFLICT_RESOLVED, NULL);
1803 }
1804}
1805
1806/*
1807 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
1808 */
1809static int e_end_block(struct drbd_work *w, int cancel)
1810{
1811 struct drbd_peer_request *peer_req =
1812 container_of(w, struct drbd_peer_request, w);
1813 struct drbd_conf *mdev = w->mdev;
1814 sector_t sector = peer_req->i.sector;
1815 int err = 0, pcmd;
1816
1817 if (peer_req->flags & EE_SEND_WRITE_ACK) {
1818 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1559 pcmd = (mdev->state.conn >= C_SYNC_SOURCE && 1819 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1560 mdev->state.conn <= C_PAUSED_SYNC_T && 1820 mdev->state.conn <= C_PAUSED_SYNC_T &&
1561 e->flags & EE_MAY_SET_IN_SYNC) ? 1821 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
1562 P_RS_WRITE_ACK : P_WRITE_ACK; 1822 P_RS_WRITE_ACK : P_WRITE_ACK;
1563 ok &= drbd_send_ack(mdev, pcmd, e); 1823 err = drbd_send_ack(mdev, pcmd, peer_req);
1564 if (pcmd == P_RS_WRITE_ACK) 1824 if (pcmd == P_RS_WRITE_ACK)
1565 drbd_set_in_sync(mdev, sector, e->size); 1825 drbd_set_in_sync(mdev, sector, peer_req->i.size);
1566 } else { 1826 } else {
1567 ok = drbd_send_ack(mdev, P_NEG_ACK, e); 1827 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
1568 /* we expect it to be marked out of sync anyways... 1828 /* we expect it to be marked out of sync anyways...
1569 * maybe assert this? */ 1829 * maybe assert this? */
1570 } 1830 }
@@ -1572,52 +1832,115 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1572 } 1832 }
1573 /* we delete from the conflict detection hash _after_ we sent out the 1833 /* we delete from the conflict detection hash _after_ we sent out the
1574 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ 1834 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
1575 if (mdev->net_conf->two_primaries) { 1835 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
1576 spin_lock_irq(&mdev->req_lock); 1836 spin_lock_irq(&mdev->tconn->req_lock);
1577 D_ASSERT(!hlist_unhashed(&e->collision)); 1837 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1578 hlist_del_init(&e->collision); 1838 drbd_remove_epoch_entry_interval(mdev, peer_req);
1579 spin_unlock_irq(&mdev->req_lock); 1839 if (peer_req->flags & EE_RESTART_REQUESTS)
1580 } else { 1840 restart_conflicting_writes(mdev, sector, peer_req->i.size);
1581 D_ASSERT(hlist_unhashed(&e->collision)); 1841 spin_unlock_irq(&mdev->tconn->req_lock);
1582 } 1842 } else
1843 D_ASSERT(drbd_interval_empty(&peer_req->i));
1583 1844
1584 drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); 1845 drbd_may_finish_epoch(mdev->tconn, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
1585 1846
1586 return ok; 1847 return err;
1587} 1848}
1588 1849
1589static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused) 1850static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
1590{ 1851{
1591 struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; 1852 struct drbd_conf *mdev = w->mdev;
1592 int ok = 1; 1853 struct drbd_peer_request *peer_req =
1854 container_of(w, struct drbd_peer_request, w);
1855 int err;
1593 1856
1594 D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); 1857 err = drbd_send_ack(mdev, ack, peer_req);
1595 ok = drbd_send_ack(mdev, P_DISCARD_ACK, e); 1858 dec_unacked(mdev);
1596 1859
1597 spin_lock_irq(&mdev->req_lock); 1860 return err;
1598 D_ASSERT(!hlist_unhashed(&e->collision)); 1861}
1599 hlist_del_init(&e->collision);
1600 spin_unlock_irq(&mdev->req_lock);
1601 1862
1602 dec_unacked(mdev); 1863static int e_send_superseded(struct drbd_work *w, int unused)
1864{
1865 return e_send_ack(w, P_SUPERSEDED);
1866}
1867
1868static int e_send_retry_write(struct drbd_work *w, int unused)
1869{
1870 struct drbd_tconn *tconn = w->mdev->tconn;
1871
1872 return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
1873 P_RETRY_WRITE : P_SUPERSEDED);
1874}
1875
1876static bool seq_greater(u32 a, u32 b)
1877{
1878 /*
1879 * We assume 32-bit wrap-around here.
1880 * For 24-bit wrap-around, we would have to shift:
1881 * a <<= 8; b <<= 8;
1882 */
1883 return (s32)a - (s32)b > 0;
1884}
1885
1886static u32 seq_max(u32 a, u32 b)
1887{
1888 return seq_greater(a, b) ? a : b;
1889}
1890
1891static bool need_peer_seq(struct drbd_conf *mdev)
1892{
1893 struct drbd_tconn *tconn = mdev->tconn;
1894 int tp;
1603 1895
1604 return ok; 1896 /*
1897 * We only need to keep track of the last packet_seq number of our peer
1898 * if we are in dual-primary mode and we have the resolve-conflicts flag set; see
1899 * handle_write_conflicts().
1900 */
1901
1902 rcu_read_lock();
1903 tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
1904 rcu_read_unlock();
1905
1906 return tp && test_bit(RESOLVE_CONFLICTS, &tconn->flags);
1605} 1907}
1606 1908
1607static bool overlapping_resync_write(struct drbd_conf *mdev, struct drbd_epoch_entry *data_e) 1909static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
1608{ 1910{
1911 unsigned int newest_peer_seq;
1609 1912
1610 struct drbd_epoch_entry *rs_e; 1913 if (need_peer_seq(mdev)) {
1914 spin_lock(&mdev->peer_seq_lock);
1915 newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
1916 mdev->peer_seq = newest_peer_seq;
1917 spin_unlock(&mdev->peer_seq_lock);
1918 /* wake up only if we actually changed mdev->peer_seq */
1919 if (peer_seq == newest_peer_seq)
1920 wake_up(&mdev->seq_wait);
1921 }
1922}
1923
1924static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
1925{
1926 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1927}
1928
1929/* maybe change sync_ee into interval trees as well? */
1930static bool overlapping_resync_write(struct drbd_conf *mdev, struct drbd_peer_request *peer_req)
1931{
1932 struct drbd_peer_request *rs_req;
1611 bool rv = 0; 1933 bool rv = 0;
1612 1934
1613 spin_lock_irq(&mdev->req_lock); 1935 spin_lock_irq(&mdev->tconn->req_lock);
1614 list_for_each_entry(rs_e, &mdev->sync_ee, w.list) { 1936 list_for_each_entry(rs_req, &mdev->sync_ee, w.list) {
1615 if (overlaps(data_e->sector, data_e->size, rs_e->sector, rs_e->size)) { 1937 if (overlaps(peer_req->i.sector, peer_req->i.size,
1938 rs_req->i.sector, rs_req->i.size)) {
1616 rv = 1; 1939 rv = 1;
1617 break; 1940 break;
1618 } 1941 }
1619 } 1942 }
1620 spin_unlock_irq(&mdev->req_lock); 1943 spin_unlock_irq(&mdev->tconn->req_lock);
1621 1944
1622 return rv; 1945 return rv;
1623} 1946}
@@ -1643,35 +1966,41 @@ static bool overlapping_resync_write(struct drbd_conf *mdev, struct drbd_epoch_e
1643 * 1966 *
1644 * returns 0 if we may process the packet, 1967 * returns 0 if we may process the packet,
1645 * -ERESTARTSYS if we were interrupted (by disconnect signal). */ 1968 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
1646static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq) 1969static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq)
1647{ 1970{
1648 DEFINE_WAIT(wait); 1971 DEFINE_WAIT(wait);
1649 unsigned int p_seq;
1650 long timeout; 1972 long timeout;
1651 int ret = 0; 1973 int ret;
1974
1975 if (!need_peer_seq(mdev))
1976 return 0;
1977
1652 spin_lock(&mdev->peer_seq_lock); 1978 spin_lock(&mdev->peer_seq_lock);
1653 for (;;) { 1979 for (;;) {
1654 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE); 1980 if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
1655 if (seq_le(packet_seq, mdev->peer_seq+1)) 1981 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
1982 ret = 0;
1656 break; 1983 break;
1984 }
1657 if (signal_pending(current)) { 1985 if (signal_pending(current)) {
1658 ret = -ERESTARTSYS; 1986 ret = -ERESTARTSYS;
1659 break; 1987 break;
1660 } 1988 }
1661 p_seq = mdev->peer_seq; 1989 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
1662 spin_unlock(&mdev->peer_seq_lock); 1990 spin_unlock(&mdev->peer_seq_lock);
1663 timeout = schedule_timeout(30*HZ); 1991 rcu_read_lock();
1992 timeout = rcu_dereference(mdev->tconn->net_conf)->ping_timeo*HZ/10;
1993 rcu_read_unlock();
1994 timeout = schedule_timeout(timeout);
1664 spin_lock(&mdev->peer_seq_lock); 1995 spin_lock(&mdev->peer_seq_lock);
1665 if (timeout == 0 && p_seq == mdev->peer_seq) { 1996 if (!timeout) {
1666 ret = -ETIMEDOUT; 1997 ret = -ETIMEDOUT;
1667 dev_err(DEV, "ASSERT FAILED waited 30 seconds for sequence update, forcing reconnect\n"); 1998 dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
1668 break; 1999 break;
1669 } 2000 }
1670 } 2001 }
1671 finish_wait(&mdev->seq_wait, &wait);
1672 if (mdev->peer_seq+1 == packet_seq)
1673 mdev->peer_seq++;
1674 spin_unlock(&mdev->peer_seq_lock); 2002 spin_unlock(&mdev->peer_seq_lock);
2003 finish_wait(&mdev->seq_wait, &wait);
1675 return ret; 2004 return ret;
1676} 2005}
1677 2006
@@ -1686,233 +2015,277 @@ static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
1686 (dpf & DP_DISCARD ? REQ_DISCARD : 0); 2015 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
1687} 2016}
1688 2017
2018static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector,
2019 unsigned int size)
2020{
2021 struct drbd_interval *i;
2022
2023 repeat:
2024 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
2025 struct drbd_request *req;
2026 struct bio_and_error m;
2027
2028 if (!i->local)
2029 continue;
2030 req = container_of(i, struct drbd_request, i);
2031 if (!(req->rq_state & RQ_POSTPONED))
2032 continue;
2033 req->rq_state &= ~RQ_POSTPONED;
2034 __req_mod(req, NEG_ACKED, &m);
2035 spin_unlock_irq(&mdev->tconn->req_lock);
2036 if (m.bio)
2037 complete_master_bio(mdev, &m);
2038 spin_lock_irq(&mdev->tconn->req_lock);
2039 goto repeat;
2040 }
2041}
2042
2043static int handle_write_conflicts(struct drbd_conf *mdev,
2044 struct drbd_peer_request *peer_req)
2045{
2046 struct drbd_tconn *tconn = mdev->tconn;
2047 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &tconn->flags);
2048 sector_t sector = peer_req->i.sector;
2049 const unsigned int size = peer_req->i.size;
2050 struct drbd_interval *i;
2051 bool equal;
2052 int err;
2053
2054 /*
2055 * Inserting the peer request into the write_requests tree will prevent
2056 * new conflicting local requests from being added.
2057 */
2058 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
2059
2060 repeat:
2061 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
2062 if (i == &peer_req->i)
2063 continue;
2064
2065 if (!i->local) {
2066 /*
2067 * Our peer has sent a conflicting remote request; this
2068 * should not happen in a two-node setup. Wait for the
2069 * earlier peer request to complete.
2070 */
2071 err = drbd_wait_misc(mdev, i);
2072 if (err)
2073 goto out;
2074 goto repeat;
2075 }
2076
2077 equal = i->sector == sector && i->size == size;
2078 if (resolve_conflicts) {
2079 /*
2080 * If the peer request is fully contained within the
2081 * overlapping request, it can be considered overwritten
2082 * and thus superseded; otherwise, it will be retried
2083 * once all overlapping requests have completed.
2084 */
2085 bool superseded = i->sector <= sector && i->sector +
2086 (i->size >> 9) >= sector + (size >> 9);
2087
2088 if (!equal)
2089 dev_alert(DEV, "Concurrent writes detected: "
2090 "local=%llus +%u, remote=%llus +%u, "
2091 "assuming %s came first\n",
2092 (unsigned long long)i->sector, i->size,
2093 (unsigned long long)sector, size,
2094 superseded ? "local" : "remote");
2095
2096 inc_unacked(mdev);
2097 peer_req->w.cb = superseded ? e_send_superseded :
2098 e_send_retry_write;
2099 list_add_tail(&peer_req->w.list, &mdev->done_ee);
2100 wake_asender(mdev->tconn);
2101
2102 err = -ENOENT;
2103 goto out;
2104 } else {
2105 struct drbd_request *req =
2106 container_of(i, struct drbd_request, i);
2107
2108 if (!equal)
2109 dev_alert(DEV, "Concurrent writes detected: "
2110 "local=%llus +%u, remote=%llus +%u\n",
2111 (unsigned long long)i->sector, i->size,
2112 (unsigned long long)sector, size);
2113
2114 if (req->rq_state & RQ_LOCAL_PENDING ||
2115 !(req->rq_state & RQ_POSTPONED)) {
2116 /*
2117 * Wait for the node with the discard flag to
2118 * decide if this request has been superseded
2119 * or needs to be retried.
2120 * Requests that have been superseded will
2121 * disappear from the write_requests tree.
2122 *
2123 * In addition, wait for the conflicting
2124 * request to finish locally before submitting
2125 * the conflicting peer request.
2126 */
2127 err = drbd_wait_misc(mdev, &req->i);
2128 if (err) {
2129 _conn_request_state(mdev->tconn,
2130 NS(conn, C_TIMEOUT),
2131 CS_HARD);
2132 fail_postponed_requests(mdev, sector, size);
2133 goto out;
2134 }
2135 goto repeat;
2136 }
2137 /*
2138 * Remember to restart the conflicting requests after
2139 * the new peer request has completed.
2140 */
2141 peer_req->flags |= EE_RESTART_REQUESTS;
2142 }
2143 }
2144 err = 0;
2145
2146 out:
2147 if (err)
2148 drbd_remove_epoch_entry_interval(mdev, peer_req);
2149 return err;
2150}
2151
1689/* mirrored write */ 2152/* mirrored write */
1690static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 2153static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi)
1691{ 2154{
2155 struct drbd_conf *mdev;
1692 sector_t sector; 2156 sector_t sector;
1693 struct drbd_epoch_entry *e; 2157 struct drbd_peer_request *peer_req;
1694 struct p_data *p = &mdev->data.rbuf.data; 2158 struct p_data *p = pi->data;
2159 u32 peer_seq = be32_to_cpu(p->seq_num);
1695 int rw = WRITE; 2160 int rw = WRITE;
1696 u32 dp_flags; 2161 u32 dp_flags;
2162 int err, tp;
1697 2163
1698 if (!get_ldev(mdev)) { 2164 mdev = vnr_to_mdev(tconn, pi->vnr);
1699 spin_lock(&mdev->peer_seq_lock); 2165 if (!mdev)
1700 if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num)) 2166 return -EIO;
1701 mdev->peer_seq++;
1702 spin_unlock(&mdev->peer_seq_lock);
1703 2167
1704 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size); 2168 if (!get_ldev(mdev)) {
1705 atomic_inc(&mdev->current_epoch->epoch_size); 2169 int err2;
1706 return drbd_drain_block(mdev, data_size); 2170
2171 err = wait_for_and_update_peer_seq(mdev, peer_seq);
2172 drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
2173 atomic_inc(&tconn->current_epoch->epoch_size);
2174 err2 = drbd_drain_block(mdev, pi->size);
2175 if (!err)
2176 err = err2;
2177 return err;
1707 } 2178 }
1708 2179
1709 /* get_ldev(mdev) successful. 2180 /*
1710 * Corresponding put_ldev done either below (on various errors), 2181 * Corresponding put_ldev done either below (on various errors), or in
1711 * or in drbd_endio_write_sec, if we successfully submit the data at 2182 * drbd_peer_request_endio, if we successfully submit the data at the
1712 * the end of this function. */ 2183 * end of this function.
2184 */
1713 2185
1714 sector = be64_to_cpu(p->sector); 2186 sector = be64_to_cpu(p->sector);
1715 e = read_in_block(mdev, p->block_id, sector, data_size); 2187 peer_req = read_in_block(mdev, p->block_id, sector, pi->size);
1716 if (!e) { 2188 if (!peer_req) {
1717 put_ldev(mdev); 2189 put_ldev(mdev);
1718 return false; 2190 return -EIO;
1719 } 2191 }
1720 2192
1721 e->w.cb = e_end_block; 2193 peer_req->w.cb = e_end_block;
1722 2194
1723 dp_flags = be32_to_cpu(p->dp_flags); 2195 dp_flags = be32_to_cpu(p->dp_flags);
1724 rw |= wire_flags_to_bio(mdev, dp_flags); 2196 rw |= wire_flags_to_bio(mdev, dp_flags);
1725 if (e->pages == NULL) { 2197 if (peer_req->pages == NULL) {
1726 D_ASSERT(e->size == 0); 2198 D_ASSERT(peer_req->i.size == 0);
1727 D_ASSERT(dp_flags & DP_FLUSH); 2199 D_ASSERT(dp_flags & DP_FLUSH);
1728 } 2200 }
1729 2201
1730 if (dp_flags & DP_MAY_SET_IN_SYNC) 2202 if (dp_flags & DP_MAY_SET_IN_SYNC)
1731 e->flags |= EE_MAY_SET_IN_SYNC; 2203 peer_req->flags |= EE_MAY_SET_IN_SYNC;
1732 2204
1733 spin_lock(&mdev->epoch_lock); 2205 spin_lock(&tconn->epoch_lock);
1734 e->epoch = mdev->current_epoch; 2206 peer_req->epoch = tconn->current_epoch;
1735 atomic_inc(&e->epoch->epoch_size); 2207 atomic_inc(&peer_req->epoch->epoch_size);
1736 atomic_inc(&e->epoch->active); 2208 atomic_inc(&peer_req->epoch->active);
1737 spin_unlock(&mdev->epoch_lock); 2209 spin_unlock(&tconn->epoch_lock);
1738 2210
1739 /* I'm the receiver, I do hold a net_cnt reference. */ 2211 rcu_read_lock();
1740 if (!mdev->net_conf->two_primaries) { 2212 tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
1741 spin_lock_irq(&mdev->req_lock); 2213 rcu_read_unlock();
1742 } else { 2214 if (tp) {
1743 /* don't get the req_lock yet, 2215 peer_req->flags |= EE_IN_INTERVAL_TREE;
1744 * we may sleep in drbd_wait_peer_seq */ 2216 err = wait_for_and_update_peer_seq(mdev, peer_seq);
1745 const int size = e->size; 2217 if (err)
1746 const int discard = test_bit(DISCARD_CONCURRENT, &mdev->flags);
1747 DEFINE_WAIT(wait);
1748 struct drbd_request *i;
1749 struct hlist_node *n;
1750 struct hlist_head *slot;
1751 int first;
1752
1753 D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
1754 BUG_ON(mdev->ee_hash == NULL);
1755 BUG_ON(mdev->tl_hash == NULL);
1756
1757 /* conflict detection and handling:
1758 * 1. wait on the sequence number,
1759 * in case this data packet overtook ACK packets.
1760 * 2. check our hash tables for conflicting requests.
1761 * we only need to walk the tl_hash, since an ee can not
1762 * have a conflict with an other ee: on the submitting
1763 * node, the corresponding req had already been conflicting,
1764 * and a conflicting req is never sent.
1765 *
1766 * Note: for two_primaries, we are protocol C,
1767 * so there cannot be any request that is DONE
1768 * but still on the transfer log.
1769 *
1770 * unconditionally add to the ee_hash.
1771 *
1772 * if no conflicting request is found:
1773 * submit.
1774 *
1775 * if any conflicting request is found
1776 * that has not yet been acked,
1777 * AND I have the "discard concurrent writes" flag:
1778 * queue (via done_ee) the P_DISCARD_ACK; OUT.
1779 *
1780 * if any conflicting request is found:
1781 * block the receiver, waiting on misc_wait
1782 * until no more conflicting requests are there,
1783 * or we get interrupted (disconnect).
1784 *
1785 * we do not just write after local io completion of those
1786 * requests, but only after req is done completely, i.e.
1787 * we wait for the P_DISCARD_ACK to arrive!
1788 *
1789 * then proceed normally, i.e. submit.
1790 */
1791 if (drbd_wait_peer_seq(mdev, be32_to_cpu(p->seq_num)))
1792 goto out_interrupted; 2218 goto out_interrupted;
1793 2219 spin_lock_irq(&mdev->tconn->req_lock);
1794 spin_lock_irq(&mdev->req_lock); 2220 err = handle_write_conflicts(mdev, peer_req);
1795 2221 if (err) {
1796 hlist_add_head(&e->collision, ee_hash_slot(mdev, sector)); 2222 spin_unlock_irq(&mdev->tconn->req_lock);
1797 2223 if (err == -ENOENT) {
1798#define OVERLAPS overlaps(i->sector, i->size, sector, size)
1799 slot = tl_hash_slot(mdev, sector);
1800 first = 1;
1801 for (;;) {
1802 int have_unacked = 0;
1803 int have_conflict = 0;
1804 prepare_to_wait(&mdev->misc_wait, &wait,
1805 TASK_INTERRUPTIBLE);
1806 hlist_for_each_entry(i, n, slot, collision) {
1807 if (OVERLAPS) {
1808 /* only ALERT on first iteration,
1809 * we may be woken up early... */
1810 if (first)
1811 dev_alert(DEV, "%s[%u] Concurrent local write detected!"
1812 " new: %llus +%u; pending: %llus +%u\n",
1813 current->comm, current->pid,
1814 (unsigned long long)sector, size,
1815 (unsigned long long)i->sector, i->size);
1816 if (i->rq_state & RQ_NET_PENDING)
1817 ++have_unacked;
1818 ++have_conflict;
1819 }
1820 }
1821#undef OVERLAPS
1822 if (!have_conflict)
1823 break;
1824
1825 /* Discard Ack only for the _first_ iteration */
1826 if (first && discard && have_unacked) {
1827 dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n",
1828 (unsigned long long)sector);
1829 inc_unacked(mdev);
1830 e->w.cb = e_send_discard_ack;
1831 list_add_tail(&e->w.list, &mdev->done_ee);
1832
1833 spin_unlock_irq(&mdev->req_lock);
1834
1835 /* we could probably send that P_DISCARD_ACK ourselves,
1836 * but I don't like the receiver using the msock */
1837
1838 put_ldev(mdev); 2224 put_ldev(mdev);
1839 wake_asender(mdev); 2225 return 0;
1840 finish_wait(&mdev->misc_wait, &wait);
1841 return true;
1842 } 2226 }
2227 goto out_interrupted;
2228 }
2229 } else
2230 spin_lock_irq(&mdev->tconn->req_lock);
2231 list_add(&peer_req->w.list, &mdev->active_ee);
2232 spin_unlock_irq(&mdev->tconn->req_lock);
1843 2233
1844 if (signal_pending(current)) { 2234 if (mdev->state.conn == C_SYNC_TARGET)
1845 hlist_del_init(&e->collision); 2235 wait_event(mdev->ee_wait, !overlapping_resync_write(mdev, peer_req));
1846
1847 spin_unlock_irq(&mdev->req_lock);
1848
1849 finish_wait(&mdev->misc_wait, &wait);
1850 goto out_interrupted;
1851 }
1852 2236
1853 spin_unlock_irq(&mdev->req_lock); 2237 if (mdev->tconn->agreed_pro_version < 100) {
1854 if (first) { 2238 rcu_read_lock();
1855 first = 0; 2239 switch (rcu_dereference(mdev->tconn->net_conf)->wire_protocol) {
1856 dev_alert(DEV, "Concurrent write! [W AFTERWARDS] " 2240 case DRBD_PROT_C:
1857 "sec=%llus\n", (unsigned long long)sector); 2241 dp_flags |= DP_SEND_WRITE_ACK;
1858 } else if (discard) { 2242 break;
1859 /* we had none on the first iteration. 2243 case DRBD_PROT_B:
1860 * there must be none now. */ 2244 dp_flags |= DP_SEND_RECEIVE_ACK;
1861 D_ASSERT(have_unacked == 0); 2245 break;
1862 }
1863 schedule();
1864 spin_lock_irq(&mdev->req_lock);
1865 } 2246 }
1866 finish_wait(&mdev->misc_wait, &wait); 2247 rcu_read_unlock();
1867 } 2248 }
1868 2249
1869 list_add(&e->w.list, &mdev->active_ee); 2250 if (dp_flags & DP_SEND_WRITE_ACK) {
1870 spin_unlock_irq(&mdev->req_lock); 2251 peer_req->flags |= EE_SEND_WRITE_ACK;
1871
1872 if (mdev->state.conn == C_SYNC_TARGET)
1873 wait_event(mdev->ee_wait, !overlapping_resync_write(mdev, e));
1874
1875 switch (mdev->net_conf->wire_protocol) {
1876 case DRBD_PROT_C:
1877 inc_unacked(mdev); 2252 inc_unacked(mdev);
1878 /* corresponding dec_unacked() in e_end_block() 2253 /* corresponding dec_unacked() in e_end_block()
1879 * respective _drbd_clear_done_ee */ 2254 * respective _drbd_clear_done_ee */
1880 break; 2255 }
1881 case DRBD_PROT_B: 2256
2257 if (dp_flags & DP_SEND_RECEIVE_ACK) {
1882 /* I really don't like it that the receiver thread 2258 /* I really don't like it that the receiver thread
1883 * sends on the msock, but anyways */ 2259 * sends on the msock, but anyways */
1884 drbd_send_ack(mdev, P_RECV_ACK, e); 2260 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
1885 break;
1886 case DRBD_PROT_A:
1887 /* nothing to do */
1888 break;
1889 } 2261 }
1890 2262
1891 if (mdev->state.pdsk < D_INCONSISTENT) { 2263 if (mdev->state.pdsk < D_INCONSISTENT) {
1892 /* In case we have the only disk of the cluster, */ 2264 /* In case we have the only disk of the cluster, */
1893 drbd_set_out_of_sync(mdev, e->sector, e->size); 2265 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
1894 e->flags |= EE_CALL_AL_COMPLETE_IO; 2266 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
1895 e->flags &= ~EE_MAY_SET_IN_SYNC; 2267 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
1896 drbd_al_begin_io(mdev, e->sector); 2268 drbd_al_begin_io(mdev, &peer_req->i);
1897 } 2269 }
1898 2270
1899 if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0) 2271 err = drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR);
1900 return true; 2272 if (!err)
2273 return 0;
1901 2274
1902 /* don't care for the reason here */ 2275 /* don't care for the reason here */
1903 dev_err(DEV, "submit failed, triggering re-connect\n"); 2276 dev_err(DEV, "submit failed, triggering re-connect\n");
1904 spin_lock_irq(&mdev->req_lock); 2277 spin_lock_irq(&mdev->tconn->req_lock);
1905 list_del(&e->w.list); 2278 list_del(&peer_req->w.list);
1906 hlist_del_init(&e->collision); 2279 drbd_remove_epoch_entry_interval(mdev, peer_req);
1907 spin_unlock_irq(&mdev->req_lock); 2280 spin_unlock_irq(&mdev->tconn->req_lock);
1908 if (e->flags & EE_CALL_AL_COMPLETE_IO) 2281 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
1909 drbd_al_complete_io(mdev, e->sector); 2282 drbd_al_complete_io(mdev, &peer_req->i);
1910 2283
1911out_interrupted: 2284out_interrupted:
1912 drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + EV_CLEANUP); 2285 drbd_may_finish_epoch(tconn, peer_req->epoch, EV_PUT + EV_CLEANUP);
1913 put_ldev(mdev); 2286 put_ldev(mdev);
1914 drbd_free_ee(mdev, e); 2287 drbd_free_peer_req(mdev, peer_req);
1915 return false; 2288 return err;
1916} 2289}
1917 2290
1918/* We may throttle resync, if the lower device seems to be busy, 2291/* We may throttle resync, if the lower device seems to be busy,
@@ -1933,9 +2306,14 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
1933 struct lc_element *tmp; 2306 struct lc_element *tmp;
1934 int curr_events; 2307 int curr_events;
1935 int throttle = 0; 2308 int throttle = 0;
2309 unsigned int c_min_rate;
2310
2311 rcu_read_lock();
2312 c_min_rate = rcu_dereference(mdev->ldev->disk_conf)->c_min_rate;
2313 rcu_read_unlock();
1936 2314
1937 /* feature disabled? */ 2315 /* feature disabled? */
1938 if (mdev->sync_conf.c_min_rate == 0) 2316 if (c_min_rate == 0)
1939 return 0; 2317 return 0;
1940 2318
1941 spin_lock_irq(&mdev->al_lock); 2319 spin_lock_irq(&mdev->al_lock);
@@ -1975,40 +2353,46 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
1975 db = mdev->rs_mark_left[i] - rs_left; 2353 db = mdev->rs_mark_left[i] - rs_left;
1976 dbdt = Bit2KB(db/dt); 2354 dbdt = Bit2KB(db/dt);
1977 2355
1978 if (dbdt > mdev->sync_conf.c_min_rate) 2356 if (dbdt > c_min_rate)
1979 throttle = 1; 2357 throttle = 1;
1980 } 2358 }
1981 return throttle; 2359 return throttle;
1982} 2360}
1983 2361
1984 2362
1985static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int digest_size) 2363static int receive_DataRequest(struct drbd_tconn *tconn, struct packet_info *pi)
1986{ 2364{
2365 struct drbd_conf *mdev;
1987 sector_t sector; 2366 sector_t sector;
1988 const sector_t capacity = drbd_get_capacity(mdev->this_bdev); 2367 sector_t capacity;
1989 struct drbd_epoch_entry *e; 2368 struct drbd_peer_request *peer_req;
1990 struct digest_info *di = NULL; 2369 struct digest_info *di = NULL;
1991 int size, verb; 2370 int size, verb;
1992 unsigned int fault_type; 2371 unsigned int fault_type;
1993 struct p_block_req *p = &mdev->data.rbuf.block_req; 2372 struct p_block_req *p = pi->data;
2373
2374 mdev = vnr_to_mdev(tconn, pi->vnr);
2375 if (!mdev)
2376 return -EIO;
2377 capacity = drbd_get_capacity(mdev->this_bdev);
1994 2378
1995 sector = be64_to_cpu(p->sector); 2379 sector = be64_to_cpu(p->sector);
1996 size = be32_to_cpu(p->blksize); 2380 size = be32_to_cpu(p->blksize);
1997 2381
1998 if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { 2382 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
1999 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, 2383 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2000 (unsigned long long)sector, size); 2384 (unsigned long long)sector, size);
2001 return false; 2385 return -EINVAL;
2002 } 2386 }
2003 if (sector + (size>>9) > capacity) { 2387 if (sector + (size>>9) > capacity) {
2004 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, 2388 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2005 (unsigned long long)sector, size); 2389 (unsigned long long)sector, size);
2006 return false; 2390 return -EINVAL;
2007 } 2391 }
2008 2392
2009 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) { 2393 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
2010 verb = 1; 2394 verb = 1;
2011 switch (cmd) { 2395 switch (pi->cmd) {
2012 case P_DATA_REQUEST: 2396 case P_DATA_REQUEST:
2013 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p); 2397 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2014 break; 2398 break;
@@ -2023,35 +2407,34 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un
2023 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC); 2407 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2024 break; 2408 break;
2025 default: 2409 default:
2026 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n", 2410 BUG();
2027 cmdname(cmd));
2028 } 2411 }
2029 if (verb && __ratelimit(&drbd_ratelimit_state)) 2412 if (verb && __ratelimit(&drbd_ratelimit_state))
2030 dev_err(DEV, "Can not satisfy peer's read request, " 2413 dev_err(DEV, "Can not satisfy peer's read request, "
2031 "no local data.\n"); 2414 "no local data.\n");
2032 2415
2033 /* drain possibly payload */ 2416 /* drain possibly payload */
2034 return drbd_drain_block(mdev, digest_size); 2417 return drbd_drain_block(mdev, pi->size);
2035 } 2418 }
2036 2419
2037 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD 2420 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2038 * "criss-cross" setup, that might cause write-out on some other DRBD, 2421 * "criss-cross" setup, that might cause write-out on some other DRBD,
2039 * which in turn might block on the other node at this very place. */ 2422 * which in turn might block on the other node at this very place. */
2040 e = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO); 2423 peer_req = drbd_alloc_peer_req(mdev, p->block_id, sector, size, GFP_NOIO);
2041 if (!e) { 2424 if (!peer_req) {
2042 put_ldev(mdev); 2425 put_ldev(mdev);
2043 return false; 2426 return -ENOMEM;
2044 } 2427 }
2045 2428
2046 switch (cmd) { 2429 switch (pi->cmd) {
2047 case P_DATA_REQUEST: 2430 case P_DATA_REQUEST:
2048 e->w.cb = w_e_end_data_req; 2431 peer_req->w.cb = w_e_end_data_req;
2049 fault_type = DRBD_FAULT_DT_RD; 2432 fault_type = DRBD_FAULT_DT_RD;
2050 /* application IO, don't drbd_rs_begin_io */ 2433 /* application IO, don't drbd_rs_begin_io */
2051 goto submit; 2434 goto submit;
2052 2435
2053 case P_RS_DATA_REQUEST: 2436 case P_RS_DATA_REQUEST:
2054 e->w.cb = w_e_end_rsdata_req; 2437 peer_req->w.cb = w_e_end_rsdata_req;
2055 fault_type = DRBD_FAULT_RS_RD; 2438 fault_type = DRBD_FAULT_RS_RD;
2056 /* used in the sector offset progress display */ 2439 /* used in the sector offset progress display */
2057 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector); 2440 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
@@ -2060,28 +2443,28 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un
2060 case P_OV_REPLY: 2443 case P_OV_REPLY:
2061 case P_CSUM_RS_REQUEST: 2444 case P_CSUM_RS_REQUEST:
2062 fault_type = DRBD_FAULT_RS_RD; 2445 fault_type = DRBD_FAULT_RS_RD;
2063 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO); 2446 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
2064 if (!di) 2447 if (!di)
2065 goto out_free_e; 2448 goto out_free_e;
2066 2449
2067 di->digest_size = digest_size; 2450 di->digest_size = pi->size;
2068 di->digest = (((char *)di)+sizeof(struct digest_info)); 2451 di->digest = (((char *)di)+sizeof(struct digest_info));
2069 2452
2070 e->digest = di; 2453 peer_req->digest = di;
2071 e->flags |= EE_HAS_DIGEST; 2454 peer_req->flags |= EE_HAS_DIGEST;
2072 2455
2073 if (drbd_recv(mdev, di->digest, digest_size) != digest_size) 2456 if (drbd_recv_all(mdev->tconn, di->digest, pi->size))
2074 goto out_free_e; 2457 goto out_free_e;
2075 2458
2076 if (cmd == P_CSUM_RS_REQUEST) { 2459 if (pi->cmd == P_CSUM_RS_REQUEST) {
2077 D_ASSERT(mdev->agreed_pro_version >= 89); 2460 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
2078 e->w.cb = w_e_end_csum_rs_req; 2461 peer_req->w.cb = w_e_end_csum_rs_req;
2079 /* used in the sector offset progress display */ 2462 /* used in the sector offset progress display */
2080 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector); 2463 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
2081 } else if (cmd == P_OV_REPLY) { 2464 } else if (pi->cmd == P_OV_REPLY) {
2082 /* track progress, we may need to throttle */ 2465 /* track progress, we may need to throttle */
2083 atomic_add(size >> 9, &mdev->rs_sect_in); 2466 atomic_add(size >> 9, &mdev->rs_sect_in);
2084 e->w.cb = w_e_end_ov_reply; 2467 peer_req->w.cb = w_e_end_ov_reply;
2085 dec_rs_pending(mdev); 2468 dec_rs_pending(mdev);
2086 /* drbd_rs_begin_io done when we sent this request, 2469 /* drbd_rs_begin_io done when we sent this request,
2087 * but accounting still needs to be done. */ 2470 * but accounting still needs to be done. */
@@ -2091,7 +2474,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un
2091 2474
2092 case P_OV_REQUEST: 2475 case P_OV_REQUEST:
2093 if (mdev->ov_start_sector == ~(sector_t)0 && 2476 if (mdev->ov_start_sector == ~(sector_t)0 &&
2094 mdev->agreed_pro_version >= 90) { 2477 mdev->tconn->agreed_pro_version >= 90) {
2095 unsigned long now = jiffies; 2478 unsigned long now = jiffies;
2096 int i; 2479 int i;
2097 mdev->ov_start_sector = sector; 2480 mdev->ov_start_sector = sector;
@@ -2105,15 +2488,12 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un
2105 dev_info(DEV, "Online Verify start sector: %llu\n", 2488 dev_info(DEV, "Online Verify start sector: %llu\n",
2106 (unsigned long long)sector); 2489 (unsigned long long)sector);
2107 } 2490 }
2108 e->w.cb = w_e_end_ov_req; 2491 peer_req->w.cb = w_e_end_ov_req;
2109 fault_type = DRBD_FAULT_RS_RD; 2492 fault_type = DRBD_FAULT_RS_RD;
2110 break; 2493 break;
2111 2494
2112 default: 2495 default:
2113 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n", 2496 BUG();
2114 cmdname(cmd));
2115 fault_type = DRBD_FAULT_MAX;
2116 goto out_free_e;
2117 } 2497 }
2118 2498
2119 /* Throttle, drbd_rs_begin_io and submit should become asynchronous 2499 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
@@ -2148,30 +2528,31 @@ submit_for_resync:
2148 2528
2149submit: 2529submit:
2150 inc_unacked(mdev); 2530 inc_unacked(mdev);
2151 spin_lock_irq(&mdev->req_lock); 2531 spin_lock_irq(&mdev->tconn->req_lock);
2152 list_add_tail(&e->w.list, &mdev->read_ee); 2532 list_add_tail(&peer_req->w.list, &mdev->read_ee);
2153 spin_unlock_irq(&mdev->req_lock); 2533 spin_unlock_irq(&mdev->tconn->req_lock);
2154 2534
2155 if (drbd_submit_ee(mdev, e, READ, fault_type) == 0) 2535 if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
2156 return true; 2536 return 0;
2157 2537
2158 /* don't care for the reason here */ 2538 /* don't care for the reason here */
2159 dev_err(DEV, "submit failed, triggering re-connect\n"); 2539 dev_err(DEV, "submit failed, triggering re-connect\n");
2160 spin_lock_irq(&mdev->req_lock); 2540 spin_lock_irq(&mdev->tconn->req_lock);
2161 list_del(&e->w.list); 2541 list_del(&peer_req->w.list);
2162 spin_unlock_irq(&mdev->req_lock); 2542 spin_unlock_irq(&mdev->tconn->req_lock);
2163 /* no drbd_rs_complete_io(), we are dropping the connection anyways */ 2543 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2164 2544
2165out_free_e: 2545out_free_e:
2166 put_ldev(mdev); 2546 put_ldev(mdev);
2167 drbd_free_ee(mdev, e); 2547 drbd_free_peer_req(mdev, peer_req);
2168 return false; 2548 return -EIO;
2169} 2549}
2170 2550
2171static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) 2551static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2172{ 2552{
2173 int self, peer, rv = -100; 2553 int self, peer, rv = -100;
2174 unsigned long ch_self, ch_peer; 2554 unsigned long ch_self, ch_peer;
2555 enum drbd_after_sb_p after_sb_0p;
2175 2556
2176 self = mdev->ldev->md.uuid[UI_BITMAP] & 1; 2557 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2177 peer = mdev->p_uuid[UI_BITMAP] & 1; 2558 peer = mdev->p_uuid[UI_BITMAP] & 1;
@@ -2179,10 +2560,14 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2179 ch_peer = mdev->p_uuid[UI_SIZE]; 2560 ch_peer = mdev->p_uuid[UI_SIZE];
2180 ch_self = mdev->comm_bm_set; 2561 ch_self = mdev->comm_bm_set;
2181 2562
2182 switch (mdev->net_conf->after_sb_0p) { 2563 rcu_read_lock();
2564 after_sb_0p = rcu_dereference(mdev->tconn->net_conf)->after_sb_0p;
2565 rcu_read_unlock();
2566 switch (after_sb_0p) {
2183 case ASB_CONSENSUS: 2567 case ASB_CONSENSUS:
2184 case ASB_DISCARD_SECONDARY: 2568 case ASB_DISCARD_SECONDARY:
2185 case ASB_CALL_HELPER: 2569 case ASB_CALL_HELPER:
2570 case ASB_VIOLENTLY:
2186 dev_err(DEV, "Configuration error.\n"); 2571 dev_err(DEV, "Configuration error.\n");
2187 break; 2572 break;
2188 case ASB_DISCONNECT: 2573 case ASB_DISCONNECT:
@@ -2211,14 +2596,14 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2211 "Using discard-least-changes instead\n"); 2596 "Using discard-least-changes instead\n");
2212 case ASB_DISCARD_ZERO_CHG: 2597 case ASB_DISCARD_ZERO_CHG:
2213 if (ch_peer == 0 && ch_self == 0) { 2598 if (ch_peer == 0 && ch_self == 0) {
2214 rv = test_bit(DISCARD_CONCURRENT, &mdev->flags) 2599 rv = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)
2215 ? -1 : 1; 2600 ? -1 : 1;
2216 break; 2601 break;
2217 } else { 2602 } else {
2218 if (ch_peer == 0) { rv = 1; break; } 2603 if (ch_peer == 0) { rv = 1; break; }
2219 if (ch_self == 0) { rv = -1; break; } 2604 if (ch_self == 0) { rv = -1; break; }
2220 } 2605 }
2221 if (mdev->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG) 2606 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
2222 break; 2607 break;
2223 case ASB_DISCARD_LEAST_CHG: 2608 case ASB_DISCARD_LEAST_CHG:
2224 if (ch_self < ch_peer) 2609 if (ch_self < ch_peer)
@@ -2227,7 +2612,7 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2227 rv = 1; 2612 rv = 1;
2228 else /* ( ch_self == ch_peer ) */ 2613 else /* ( ch_self == ch_peer ) */
2229 /* Well, then use something else. */ 2614 /* Well, then use something else. */
2230 rv = test_bit(DISCARD_CONCURRENT, &mdev->flags) 2615 rv = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)
2231 ? -1 : 1; 2616 ? -1 : 1;
2232 break; 2617 break;
2233 case ASB_DISCARD_LOCAL: 2618 case ASB_DISCARD_LOCAL:
@@ -2243,13 +2628,18 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2243static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) 2628static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2244{ 2629{
2245 int hg, rv = -100; 2630 int hg, rv = -100;
2631 enum drbd_after_sb_p after_sb_1p;
2246 2632
2247 switch (mdev->net_conf->after_sb_1p) { 2633 rcu_read_lock();
2634 after_sb_1p = rcu_dereference(mdev->tconn->net_conf)->after_sb_1p;
2635 rcu_read_unlock();
2636 switch (after_sb_1p) {
2248 case ASB_DISCARD_YOUNGER_PRI: 2637 case ASB_DISCARD_YOUNGER_PRI:
2249 case ASB_DISCARD_OLDER_PRI: 2638 case ASB_DISCARD_OLDER_PRI:
2250 case ASB_DISCARD_LEAST_CHG: 2639 case ASB_DISCARD_LEAST_CHG:
2251 case ASB_DISCARD_LOCAL: 2640 case ASB_DISCARD_LOCAL:
2252 case ASB_DISCARD_REMOTE: 2641 case ASB_DISCARD_REMOTE:
2642 case ASB_DISCARD_ZERO_CHG:
2253 dev_err(DEV, "Configuration error.\n"); 2643 dev_err(DEV, "Configuration error.\n");
2254 break; 2644 break;
2255 case ASB_DISCONNECT: 2645 case ASB_DISCONNECT:
@@ -2292,8 +2682,12 @@ static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2292static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) 2682static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2293{ 2683{
2294 int hg, rv = -100; 2684 int hg, rv = -100;
2685 enum drbd_after_sb_p after_sb_2p;
2295 2686
2296 switch (mdev->net_conf->after_sb_2p) { 2687 rcu_read_lock();
2688 after_sb_2p = rcu_dereference(mdev->tconn->net_conf)->after_sb_2p;
2689 rcu_read_unlock();
2690 switch (after_sb_2p) {
2297 case ASB_DISCARD_YOUNGER_PRI: 2691 case ASB_DISCARD_YOUNGER_PRI:
2298 case ASB_DISCARD_OLDER_PRI: 2692 case ASB_DISCARD_OLDER_PRI:
2299 case ASB_DISCARD_LEAST_CHG: 2693 case ASB_DISCARD_LEAST_CHG:
@@ -2301,6 +2695,7 @@ static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2301 case ASB_DISCARD_REMOTE: 2695 case ASB_DISCARD_REMOTE:
2302 case ASB_CONSENSUS: 2696 case ASB_CONSENSUS:
2303 case ASB_DISCARD_SECONDARY: 2697 case ASB_DISCARD_SECONDARY:
2698 case ASB_DISCARD_ZERO_CHG:
2304 dev_err(DEV, "Configuration error.\n"); 2699 dev_err(DEV, "Configuration error.\n");
2305 break; 2700 break;
2306 case ASB_VIOLENTLY: 2701 case ASB_VIOLENTLY:
@@ -2386,13 +2781,15 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l
2386 2781
2387 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) { 2782 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2388 2783
2389 if (mdev->agreed_pro_version < 91) 2784 if (mdev->tconn->agreed_pro_version < 91)
2390 return -1091; 2785 return -1091;
2391 2786
2392 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) && 2787 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2393 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) { 2788 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2394 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n"); 2789 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2395 drbd_uuid_set_bm(mdev, 0UL); 2790 drbd_uuid_move_history(mdev);
2791 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
2792 mdev->ldev->md.uuid[UI_BITMAP] = 0;
2396 2793
2397 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, 2794 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2398 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0); 2795 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
@@ -2407,7 +2804,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l
2407 2804
2408 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) { 2805 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2409 2806
2410 if (mdev->agreed_pro_version < 91) 2807 if (mdev->tconn->agreed_pro_version < 91)
2411 return -1091; 2808 return -1091;
2412 2809
2413 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) && 2810 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
@@ -2440,7 +2837,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l
2440 case 1: /* self_pri && !peer_pri */ return 1; 2837 case 1: /* self_pri && !peer_pri */ return 1;
2441 case 2: /* !self_pri && peer_pri */ return -1; 2838 case 2: /* !self_pri && peer_pri */ return -1;
2442 case 3: /* self_pri && peer_pri */ 2839 case 3: /* self_pri && peer_pri */
2443 dc = test_bit(DISCARD_CONCURRENT, &mdev->flags); 2840 dc = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags);
2444 return dc ? -1 : 1; 2841 return dc ? -1 : 1;
2445 } 2842 }
2446 } 2843 }
@@ -2453,14 +2850,14 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l
2453 *rule_nr = 51; 2850 *rule_nr = 51;
2454 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1); 2851 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2455 if (self == peer) { 2852 if (self == peer) {
2456 if (mdev->agreed_pro_version < 96 ? 2853 if (mdev->tconn->agreed_pro_version < 96 ?
2457 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == 2854 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2458 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) : 2855 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2459 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) { 2856 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
2460 /* The last P_SYNC_UUID did not get though. Undo the last start of 2857 /* The last P_SYNC_UUID did not get though. Undo the last start of
2461 resync as sync source modifications of the peer's UUIDs. */ 2858 resync as sync source modifications of the peer's UUIDs. */
2462 2859
2463 if (mdev->agreed_pro_version < 91) 2860 if (mdev->tconn->agreed_pro_version < 91)
2464 return -1091; 2861 return -1091;
2465 2862
2466 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START]; 2863 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
@@ -2490,18 +2887,18 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l
2490 *rule_nr = 71; 2887 *rule_nr = 71;
2491 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); 2888 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2492 if (self == peer) { 2889 if (self == peer) {
2493 if (mdev->agreed_pro_version < 96 ? 2890 if (mdev->tconn->agreed_pro_version < 96 ?
2494 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == 2891 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2495 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) : 2892 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2496 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) { 2893 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
2497 /* The last P_SYNC_UUID did not get though. Undo the last start of 2894 /* The last P_SYNC_UUID did not get though. Undo the last start of
2498 resync as sync source modifications of our UUIDs. */ 2895 resync as sync source modifications of our UUIDs. */
2499 2896
2500 if (mdev->agreed_pro_version < 91) 2897 if (mdev->tconn->agreed_pro_version < 91)
2501 return -1091; 2898 return -1091;
2502 2899
2503 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]); 2900 __drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2504 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]); 2901 __drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2505 2902
2506 dev_info(DEV, "Last syncUUID did not get through, corrected:\n"); 2903 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
2507 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, 2904 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
@@ -2545,20 +2942,24 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l
2545static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role, 2942static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2546 enum drbd_disk_state peer_disk) __must_hold(local) 2943 enum drbd_disk_state peer_disk) __must_hold(local)
2547{ 2944{
2548 int hg, rule_nr;
2549 enum drbd_conns rv = C_MASK; 2945 enum drbd_conns rv = C_MASK;
2550 enum drbd_disk_state mydisk; 2946 enum drbd_disk_state mydisk;
2947 struct net_conf *nc;
2948 int hg, rule_nr, rr_conflict, tentative;
2551 2949
2552 mydisk = mdev->state.disk; 2950 mydisk = mdev->state.disk;
2553 if (mydisk == D_NEGOTIATING) 2951 if (mydisk == D_NEGOTIATING)
2554 mydisk = mdev->new_state_tmp.disk; 2952 mydisk = mdev->new_state_tmp.disk;
2555 2953
2556 dev_info(DEV, "drbd_sync_handshake:\n"); 2954 dev_info(DEV, "drbd_sync_handshake:\n");
2955
2956 spin_lock_irq(&mdev->ldev->md.uuid_lock);
2557 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0); 2957 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2558 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, 2958 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2559 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]); 2959 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2560 2960
2561 hg = drbd_uuid_compare(mdev, &rule_nr); 2961 hg = drbd_uuid_compare(mdev, &rule_nr);
2962 spin_unlock_irq(&mdev->ldev->md.uuid_lock);
2562 2963
2563 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr); 2964 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2564 2965
@@ -2584,7 +2985,10 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
2584 if (abs(hg) == 100) 2985 if (abs(hg) == 100)
2585 drbd_khelper(mdev, "initial-split-brain"); 2986 drbd_khelper(mdev, "initial-split-brain");
2586 2987
2587 if (hg == 100 || (hg == -100 && mdev->net_conf->always_asbp)) { 2988 rcu_read_lock();
2989 nc = rcu_dereference(mdev->tconn->net_conf);
2990
2991 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
2588 int pcount = (mdev->state.role == R_PRIMARY) 2992 int pcount = (mdev->state.role == R_PRIMARY)
2589 + (peer_role == R_PRIMARY); 2993 + (peer_role == R_PRIMARY);
2590 int forced = (hg == -100); 2994 int forced = (hg == -100);
@@ -2613,9 +3017,9 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
2613 } 3017 }
2614 3018
2615 if (hg == -100) { 3019 if (hg == -100) {
2616 if (mdev->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1)) 3020 if (test_bit(DISCARD_MY_DATA, &mdev->flags) && !(mdev->p_uuid[UI_FLAGS]&1))
2617 hg = -1; 3021 hg = -1;
2618 if (!mdev->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1)) 3022 if (!test_bit(DISCARD_MY_DATA, &mdev->flags) && (mdev->p_uuid[UI_FLAGS]&1))
2619 hg = 1; 3023 hg = 1;
2620 3024
2621 if (abs(hg) < 100) 3025 if (abs(hg) < 100)
@@ -2623,6 +3027,9 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
2623 "Sync from %s node\n", 3027 "Sync from %s node\n",
2624 (hg < 0) ? "peer" : "this"); 3028 (hg < 0) ? "peer" : "this");
2625 } 3029 }
3030 rr_conflict = nc->rr_conflict;
3031 tentative = nc->tentative;
3032 rcu_read_unlock();
2626 3033
2627 if (hg == -100) { 3034 if (hg == -100) {
2628 /* FIXME this log message is not correct if we end up here 3035 /* FIXME this log message is not correct if we end up here
@@ -2641,7 +3048,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
2641 3048
2642 if (hg < 0 && /* by intention we do not use mydisk here. */ 3049 if (hg < 0 && /* by intention we do not use mydisk here. */
2643 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) { 3050 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
2644 switch (mdev->net_conf->rr_conflict) { 3051 switch (rr_conflict) {
2645 case ASB_CALL_HELPER: 3052 case ASB_CALL_HELPER:
2646 drbd_khelper(mdev, "pri-lost"); 3053 drbd_khelper(mdev, "pri-lost");
2647 /* fall through */ 3054 /* fall through */
@@ -2654,7 +3061,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
2654 } 3061 }
2655 } 3062 }
2656 3063
2657 if (mdev->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) { 3064 if (tentative || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
2658 if (hg == 0) 3065 if (hg == 0)
2659 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n"); 3066 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2660 else 3067 else
@@ -2686,33 +3093,29 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
2686 return rv; 3093 return rv;
2687} 3094}
2688 3095
2689/* returns 1 if invalid */ 3096static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
2690static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2691{ 3097{
2692 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */ 3098 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
2693 if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) || 3099 if (peer == ASB_DISCARD_REMOTE)
2694 (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL)) 3100 return ASB_DISCARD_LOCAL;
2695 return 0;
2696 3101
2697 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */ 3102 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
2698 if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL || 3103 if (peer == ASB_DISCARD_LOCAL)
2699 self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL) 3104 return ASB_DISCARD_REMOTE;
2700 return 1;
2701 3105
2702 /* everything else is valid if they are equal on both sides. */ 3106 /* everything else is valid if they are equal on both sides. */
2703 if (peer == self) 3107 return peer;
2704 return 0;
2705
2706 /* everything es is invalid. */
2707 return 1;
2708} 3108}
2709 3109
2710static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 3110static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi)
2711{ 3111{
2712 struct p_protocol *p = &mdev->data.rbuf.protocol; 3112 struct p_protocol *p = pi->data;
2713 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; 3113 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
2714 int p_want_lose, p_two_primaries, cf; 3114 int p_proto, p_discard_my_data, p_two_primaries, cf;
2715 char p_integrity_alg[SHARED_SECRET_MAX] = ""; 3115 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3116 char integrity_alg[SHARED_SECRET_MAX] = "";
3117 struct crypto_hash *peer_integrity_tfm = NULL;
3118 void *int_dig_in = NULL, *int_dig_vv = NULL;
2716 3119
2717 p_proto = be32_to_cpu(p->protocol); 3120 p_proto = be32_to_cpu(p->protocol);
2718 p_after_sb_0p = be32_to_cpu(p->after_sb_0p); 3121 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
@@ -2720,63 +3123,138 @@ static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsig
2720 p_after_sb_2p = be32_to_cpu(p->after_sb_2p); 3123 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
2721 p_two_primaries = be32_to_cpu(p->two_primaries); 3124 p_two_primaries = be32_to_cpu(p->two_primaries);
2722 cf = be32_to_cpu(p->conn_flags); 3125 cf = be32_to_cpu(p->conn_flags);
2723 p_want_lose = cf & CF_WANT_LOSE; 3126 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
2724
2725 clear_bit(CONN_DRY_RUN, &mdev->flags);
2726 3127
2727 if (cf & CF_DRY_RUN) 3128 if (tconn->agreed_pro_version >= 87) {
2728 set_bit(CONN_DRY_RUN, &mdev->flags); 3129 int err;
2729 3130
2730 if (p_proto != mdev->net_conf->wire_protocol) { 3131 if (pi->size > sizeof(integrity_alg))
2731 dev_err(DEV, "incompatible communication protocols\n"); 3132 return -EIO;
2732 goto disconnect; 3133 err = drbd_recv_all(tconn, integrity_alg, pi->size);
3134 if (err)
3135 return err;
3136 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
2733 } 3137 }
2734 3138
2735 if (cmp_after_sb(p_after_sb_0p, mdev->net_conf->after_sb_0p)) { 3139 if (pi->cmd != P_PROTOCOL_UPDATE) {
2736 dev_err(DEV, "incompatible after-sb-0pri settings\n"); 3140 clear_bit(CONN_DRY_RUN, &tconn->flags);
2737 goto disconnect;
2738 }
2739 3141
2740 if (cmp_after_sb(p_after_sb_1p, mdev->net_conf->after_sb_1p)) { 3142 if (cf & CF_DRY_RUN)
2741 dev_err(DEV, "incompatible after-sb-1pri settings\n"); 3143 set_bit(CONN_DRY_RUN, &tconn->flags);
2742 goto disconnect;
2743 }
2744 3144
2745 if (cmp_after_sb(p_after_sb_2p, mdev->net_conf->after_sb_2p)) { 3145 rcu_read_lock();
2746 dev_err(DEV, "incompatible after-sb-2pri settings\n"); 3146 nc = rcu_dereference(tconn->net_conf);
2747 goto disconnect;
2748 }
2749 3147
2750 if (p_want_lose && mdev->net_conf->want_lose) { 3148 if (p_proto != nc->wire_protocol) {
2751 dev_err(DEV, "both sides have the 'want_lose' flag set\n"); 3149 conn_err(tconn, "incompatible %s settings\n", "protocol");
2752 goto disconnect; 3150 goto disconnect_rcu_unlock;
2753 } 3151 }
2754 3152
2755 if (p_two_primaries != mdev->net_conf->two_primaries) { 3153 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
2756 dev_err(DEV, "incompatible setting of the two-primaries options\n"); 3154 conn_err(tconn, "incompatible %s settings\n", "after-sb-0pri");
2757 goto disconnect; 3155 goto disconnect_rcu_unlock;
3156 }
3157
3158 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
3159 conn_err(tconn, "incompatible %s settings\n", "after-sb-1pri");
3160 goto disconnect_rcu_unlock;
3161 }
3162
3163 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
3164 conn_err(tconn, "incompatible %s settings\n", "after-sb-2pri");
3165 goto disconnect_rcu_unlock;
3166 }
3167
3168 if (p_discard_my_data && nc->discard_my_data) {
3169 conn_err(tconn, "incompatible %s settings\n", "discard-my-data");
3170 goto disconnect_rcu_unlock;
3171 }
3172
3173 if (p_two_primaries != nc->two_primaries) {
3174 conn_err(tconn, "incompatible %s settings\n", "allow-two-primaries");
3175 goto disconnect_rcu_unlock;
3176 }
3177
3178 if (strcmp(integrity_alg, nc->integrity_alg)) {
3179 conn_err(tconn, "incompatible %s settings\n", "data-integrity-alg");
3180 goto disconnect_rcu_unlock;
3181 }
3182
3183 rcu_read_unlock();
2758 } 3184 }
2759 3185
2760 if (mdev->agreed_pro_version >= 87) { 3186 if (integrity_alg[0]) {
2761 unsigned char *my_alg = mdev->net_conf->integrity_alg; 3187 int hash_size;
2762 3188
2763 if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size) 3189 /*
2764 return false; 3190 * We can only change the peer data integrity algorithm
3191 * here. Changing our own data integrity algorithm
3192 * requires that we send a P_PROTOCOL_UPDATE packet at
3193 * the same time; otherwise, the peer has no way to
3194 * tell between which packets the algorithm should
3195 * change.
3196 */
2765 3197
2766 p_integrity_alg[SHARED_SECRET_MAX-1] = 0; 3198 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
2767 if (strcmp(p_integrity_alg, my_alg)) { 3199 if (!peer_integrity_tfm) {
2768 dev_err(DEV, "incompatible setting of the data-integrity-alg\n"); 3200 conn_err(tconn, "peer data-integrity-alg %s not supported\n",
3201 integrity_alg);
2769 goto disconnect; 3202 goto disconnect;
2770 } 3203 }
2771 dev_info(DEV, "data-integrity-alg: %s\n", 3204
2772 my_alg[0] ? my_alg : (unsigned char *)"<not-used>"); 3205 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3206 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3207 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3208 if (!(int_dig_in && int_dig_vv)) {
3209 conn_err(tconn, "Allocation of buffers for data integrity checking failed\n");
3210 goto disconnect;
3211 }
3212 }
3213
3214 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3215 if (!new_net_conf) {
3216 conn_err(tconn, "Allocation of new net_conf failed\n");
3217 goto disconnect;
2773 } 3218 }
2774 3219
2775 return true; 3220 mutex_lock(&tconn->data.mutex);
3221 mutex_lock(&tconn->conf_update);
3222 old_net_conf = tconn->net_conf;
3223 *new_net_conf = *old_net_conf;
3224
3225 new_net_conf->wire_protocol = p_proto;
3226 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3227 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3228 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3229 new_net_conf->two_primaries = p_two_primaries;
2776 3230
3231 rcu_assign_pointer(tconn->net_conf, new_net_conf);
3232 mutex_unlock(&tconn->conf_update);
3233 mutex_unlock(&tconn->data.mutex);
3234
3235 crypto_free_hash(tconn->peer_integrity_tfm);
3236 kfree(tconn->int_dig_in);
3237 kfree(tconn->int_dig_vv);
3238 tconn->peer_integrity_tfm = peer_integrity_tfm;
3239 tconn->int_dig_in = int_dig_in;
3240 tconn->int_dig_vv = int_dig_vv;
3241
3242 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
3243 conn_info(tconn, "peer data-integrity-alg: %s\n",
3244 integrity_alg[0] ? integrity_alg : "(none)");
3245
3246 synchronize_rcu();
3247 kfree(old_net_conf);
3248 return 0;
3249
3250disconnect_rcu_unlock:
3251 rcu_read_unlock();
2777disconnect: 3252disconnect:
2778 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 3253 crypto_free_hash(peer_integrity_tfm);
2779 return false; 3254 kfree(int_dig_in);
3255 kfree(int_dig_vv);
3256 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
3257 return -EIO;
2780} 3258}
2781 3259
2782/* helper function 3260/* helper function
@@ -2798,24 +3276,64 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2798 alg, name, PTR_ERR(tfm)); 3276 alg, name, PTR_ERR(tfm));
2799 return tfm; 3277 return tfm;
2800 } 3278 }
2801 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2802 crypto_free_hash(tfm);
2803 dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
2804 return ERR_PTR(-EINVAL);
2805 }
2806 return tfm; 3279 return tfm;
2807} 3280}
2808 3281
2809static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int packet_size) 3282static int ignore_remaining_packet(struct drbd_tconn *tconn, struct packet_info *pi)
3283{
3284 void *buffer = tconn->data.rbuf;
3285 int size = pi->size;
3286
3287 while (size) {
3288 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
3289 s = drbd_recv(tconn, buffer, s);
3290 if (s <= 0) {
3291 if (s < 0)
3292 return s;
3293 break;
3294 }
3295 size -= s;
3296 }
3297 if (size)
3298 return -EIO;
3299 return 0;
3300}
3301
3302/*
3303 * config_unknown_volume - device configuration command for unknown volume
3304 *
3305 * When a device is added to an existing connection, the node on which the
3306 * device is added first will send configuration commands to its peer but the
3307 * peer will not know about the device yet. It will warn and ignore these
3308 * commands. Once the device is added on the second node, the second node will
3309 * send the same device configuration commands, but in the other direction.
3310 *
3311 * (We can also end up here if drbd is misconfigured.)
3312 */
3313static int config_unknown_volume(struct drbd_tconn *tconn, struct packet_info *pi)
2810{ 3314{
2811 int ok = true; 3315 conn_warn(tconn, "%s packet received for volume %u, which is not configured locally\n",
2812 struct p_rs_param_95 *p = &mdev->data.rbuf.rs_param_95; 3316 cmdname(pi->cmd), pi->vnr);
3317 return ignore_remaining_packet(tconn, pi);
3318}
3319
3320static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi)
3321{
3322 struct drbd_conf *mdev;
3323 struct p_rs_param_95 *p;
2813 unsigned int header_size, data_size, exp_max_sz; 3324 unsigned int header_size, data_size, exp_max_sz;
2814 struct crypto_hash *verify_tfm = NULL; 3325 struct crypto_hash *verify_tfm = NULL;
2815 struct crypto_hash *csums_tfm = NULL; 3326 struct crypto_hash *csums_tfm = NULL;
2816 const int apv = mdev->agreed_pro_version; 3327 struct net_conf *old_net_conf, *new_net_conf = NULL;
2817 int *rs_plan_s = NULL; 3328 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
3329 const int apv = tconn->agreed_pro_version;
3330 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
2818 int fifo_size = 0; 3331 int fifo_size = 0;
3332 int err;
3333
3334 mdev = vnr_to_mdev(tconn, pi->vnr);
3335 if (!mdev)
3336 return config_unknown_volume(tconn, pi);
2819 3337
2820 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param) 3338 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
2821 : apv == 88 ? sizeof(struct p_rs_param) 3339 : apv == 88 ? sizeof(struct p_rs_param)
@@ -2823,32 +3341,49 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
2823 : apv <= 94 ? sizeof(struct p_rs_param_89) 3341 : apv <= 94 ? sizeof(struct p_rs_param_89)
2824 : /* apv >= 95 */ sizeof(struct p_rs_param_95); 3342 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
2825 3343
2826 if (packet_size > exp_max_sz) { 3344 if (pi->size > exp_max_sz) {
2827 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n", 3345 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
2828 packet_size, exp_max_sz); 3346 pi->size, exp_max_sz);
2829 return false; 3347 return -EIO;
2830 } 3348 }
2831 3349
2832 if (apv <= 88) { 3350 if (apv <= 88) {
2833 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header80); 3351 header_size = sizeof(struct p_rs_param);
2834 data_size = packet_size - header_size; 3352 data_size = pi->size - header_size;
2835 } else if (apv <= 94) { 3353 } else if (apv <= 94) {
2836 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header80); 3354 header_size = sizeof(struct p_rs_param_89);
2837 data_size = packet_size - header_size; 3355 data_size = pi->size - header_size;
2838 D_ASSERT(data_size == 0); 3356 D_ASSERT(data_size == 0);
2839 } else { 3357 } else {
2840 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header80); 3358 header_size = sizeof(struct p_rs_param_95);
2841 data_size = packet_size - header_size; 3359 data_size = pi->size - header_size;
2842 D_ASSERT(data_size == 0); 3360 D_ASSERT(data_size == 0);
2843 } 3361 }
2844 3362
2845 /* initialize verify_alg and csums_alg */ 3363 /* initialize verify_alg and csums_alg */
3364 p = pi->data;
2846 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); 3365 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
2847 3366
2848 if (drbd_recv(mdev, &p->head.payload, header_size) != header_size) 3367 err = drbd_recv_all(mdev->tconn, p, header_size);
2849 return false; 3368 if (err)
3369 return err;
2850 3370
2851 mdev->sync_conf.rate = be32_to_cpu(p->rate); 3371 mutex_lock(&mdev->tconn->conf_update);
3372 old_net_conf = mdev->tconn->net_conf;
3373 if (get_ldev(mdev)) {
3374 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3375 if (!new_disk_conf) {
3376 put_ldev(mdev);
3377 mutex_unlock(&mdev->tconn->conf_update);
3378 dev_err(DEV, "Allocation of new disk_conf failed\n");
3379 return -ENOMEM;
3380 }
3381
3382 old_disk_conf = mdev->ldev->disk_conf;
3383 *new_disk_conf = *old_disk_conf;
3384
3385 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
3386 }
2852 3387
2853 if (apv >= 88) { 3388 if (apv >= 88) {
2854 if (apv == 88) { 3389 if (apv == 88) {
@@ -2856,12 +3391,13 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
2856 dev_err(DEV, "verify-alg of wrong size, " 3391 dev_err(DEV, "verify-alg of wrong size, "
2857 "peer wants %u, accepting only up to %u byte\n", 3392 "peer wants %u, accepting only up to %u byte\n",
2858 data_size, SHARED_SECRET_MAX); 3393 data_size, SHARED_SECRET_MAX);
2859 return false; 3394 err = -EIO;
3395 goto reconnect;
2860 } 3396 }
2861 3397
2862 if (drbd_recv(mdev, p->verify_alg, data_size) != data_size) 3398 err = drbd_recv_all(mdev->tconn, p->verify_alg, data_size);
2863 return false; 3399 if (err)
2864 3400 goto reconnect;
2865 /* we expect NUL terminated string */ 3401 /* we expect NUL terminated string */
2866 /* but just in case someone tries to be evil */ 3402 /* but just in case someone tries to be evil */
2867 D_ASSERT(p->verify_alg[data_size-1] == 0); 3403 D_ASSERT(p->verify_alg[data_size-1] == 0);
@@ -2876,10 +3412,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
2876 p->csums_alg[SHARED_SECRET_MAX-1] = 0; 3412 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
2877 } 3413 }
2878 3414
2879 if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) { 3415 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
2880 if (mdev->state.conn == C_WF_REPORT_PARAMS) { 3416 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
2881 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n", 3417 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
2882 mdev->sync_conf.verify_alg, p->verify_alg); 3418 old_net_conf->verify_alg, p->verify_alg);
2883 goto disconnect; 3419 goto disconnect;
2884 } 3420 }
2885 verify_tfm = drbd_crypto_alloc_digest_safe(mdev, 3421 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
@@ -2890,10 +3426,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
2890 } 3426 }
2891 } 3427 }
2892 3428
2893 if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) { 3429 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
2894 if (mdev->state.conn == C_WF_REPORT_PARAMS) { 3430 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
2895 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n", 3431 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
2896 mdev->sync_conf.csums_alg, p->csums_alg); 3432 old_net_conf->csums_alg, p->csums_alg);
2897 goto disconnect; 3433 goto disconnect;
2898 } 3434 }
2899 csums_tfm = drbd_crypto_alloc_digest_safe(mdev, 3435 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
@@ -2904,57 +3440,91 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
2904 } 3440 }
2905 } 3441 }
2906 3442
2907 if (apv > 94) { 3443 if (apv > 94 && new_disk_conf) {
2908 mdev->sync_conf.rate = be32_to_cpu(p->rate); 3444 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
2909 mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead); 3445 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
2910 mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target); 3446 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
2911 mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target); 3447 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
2912 mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate); 3448
2913 3449 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
2914 fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; 3450 if (fifo_size != mdev->rs_plan_s->size) {
2915 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { 3451 new_plan = fifo_alloc(fifo_size);
2916 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); 3452 if (!new_plan) {
2917 if (!rs_plan_s) {
2918 dev_err(DEV, "kmalloc of fifo_buffer failed"); 3453 dev_err(DEV, "kmalloc of fifo_buffer failed");
3454 put_ldev(mdev);
2919 goto disconnect; 3455 goto disconnect;
2920 } 3456 }
2921 } 3457 }
2922 } 3458 }
2923 3459
2924 spin_lock(&mdev->peer_seq_lock); 3460 if (verify_tfm || csums_tfm) {
2925 /* lock against drbd_nl_syncer_conf() */ 3461 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
2926 if (verify_tfm) { 3462 if (!new_net_conf) {
2927 strcpy(mdev->sync_conf.verify_alg, p->verify_alg); 3463 dev_err(DEV, "Allocation of new net_conf failed\n");
2928 mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1; 3464 goto disconnect;
2929 crypto_free_hash(mdev->verify_tfm); 3465 }
2930 mdev->verify_tfm = verify_tfm; 3466
2931 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg); 3467 *new_net_conf = *old_net_conf;
2932 } 3468
2933 if (csums_tfm) { 3469 if (verify_tfm) {
2934 strcpy(mdev->sync_conf.csums_alg, p->csums_alg); 3470 strcpy(new_net_conf->verify_alg, p->verify_alg);
2935 mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1; 3471 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
2936 crypto_free_hash(mdev->csums_tfm); 3472 crypto_free_hash(mdev->tconn->verify_tfm);
2937 mdev->csums_tfm = csums_tfm; 3473 mdev->tconn->verify_tfm = verify_tfm;
2938 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg); 3474 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
2939 } 3475 }
2940 if (fifo_size != mdev->rs_plan_s.size) { 3476 if (csums_tfm) {
2941 kfree(mdev->rs_plan_s.values); 3477 strcpy(new_net_conf->csums_alg, p->csums_alg);
2942 mdev->rs_plan_s.values = rs_plan_s; 3478 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
2943 mdev->rs_plan_s.size = fifo_size; 3479 crypto_free_hash(mdev->tconn->csums_tfm);
2944 mdev->rs_planed = 0; 3480 mdev->tconn->csums_tfm = csums_tfm;
3481 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
3482 }
3483 rcu_assign_pointer(tconn->net_conf, new_net_conf);
2945 } 3484 }
2946 spin_unlock(&mdev->peer_seq_lock);
2947 } 3485 }
2948 3486
2949 return ok; 3487 if (new_disk_conf) {
3488 rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
3489 put_ldev(mdev);
3490 }
3491
3492 if (new_plan) {
3493 old_plan = mdev->rs_plan_s;
3494 rcu_assign_pointer(mdev->rs_plan_s, new_plan);
3495 }
3496
3497 mutex_unlock(&mdev->tconn->conf_update);
3498 synchronize_rcu();
3499 if (new_net_conf)
3500 kfree(old_net_conf);
3501 kfree(old_disk_conf);
3502 kfree(old_plan);
3503
3504 return 0;
3505
3506reconnect:
3507 if (new_disk_conf) {
3508 put_ldev(mdev);
3509 kfree(new_disk_conf);
3510 }
3511 mutex_unlock(&mdev->tconn->conf_update);
3512 return -EIO;
3513
2950disconnect: 3514disconnect:
3515 kfree(new_plan);
3516 if (new_disk_conf) {
3517 put_ldev(mdev);
3518 kfree(new_disk_conf);
3519 }
3520 mutex_unlock(&mdev->tconn->conf_update);
2951 /* just for completeness: actually not needed, 3521 /* just for completeness: actually not needed,
2952 * as this is not reached if csums_tfm was ok. */ 3522 * as this is not reached if csums_tfm was ok. */
2953 crypto_free_hash(csums_tfm); 3523 crypto_free_hash(csums_tfm);
2954 /* but free the verify_tfm again, if csums_tfm did not work out */ 3524 /* but free the verify_tfm again, if csums_tfm did not work out */
2955 crypto_free_hash(verify_tfm); 3525 crypto_free_hash(verify_tfm);
2956 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 3526 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
2957 return false; 3527 return -EIO;
2958} 3528}
2959 3529
2960/* warn if the arguments differ by more than 12.5% */ 3530/* warn if the arguments differ by more than 12.5% */
@@ -2970,59 +3540,77 @@ static void warn_if_differ_considerably(struct drbd_conf *mdev,
2970 (unsigned long long)a, (unsigned long long)b); 3540 (unsigned long long)a, (unsigned long long)b);
2971} 3541}
2972 3542
2973static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 3543static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
2974{ 3544{
2975 struct p_sizes *p = &mdev->data.rbuf.sizes; 3545 struct drbd_conf *mdev;
3546 struct p_sizes *p = pi->data;
2976 enum determine_dev_size dd = unchanged; 3547 enum determine_dev_size dd = unchanged;
2977 sector_t p_size, p_usize, my_usize; 3548 sector_t p_size, p_usize, my_usize;
2978 int ldsc = 0; /* local disk size changed */ 3549 int ldsc = 0; /* local disk size changed */
2979 enum dds_flags ddsf; 3550 enum dds_flags ddsf;
2980 3551
3552 mdev = vnr_to_mdev(tconn, pi->vnr);
3553 if (!mdev)
3554 return config_unknown_volume(tconn, pi);
3555
2981 p_size = be64_to_cpu(p->d_size); 3556 p_size = be64_to_cpu(p->d_size);
2982 p_usize = be64_to_cpu(p->u_size); 3557 p_usize = be64_to_cpu(p->u_size);
2983 3558
2984 if (p_size == 0 && mdev->state.disk == D_DISKLESS) {
2985 dev_err(DEV, "some backing storage is needed\n");
2986 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
2987 return false;
2988 }
2989
2990 /* just store the peer's disk size for now. 3559 /* just store the peer's disk size for now.
2991 * we still need to figure out whether we accept that. */ 3560 * we still need to figure out whether we accept that. */
2992 mdev->p_size = p_size; 3561 mdev->p_size = p_size;
2993 3562
2994 if (get_ldev(mdev)) { 3563 if (get_ldev(mdev)) {
3564 rcu_read_lock();
3565 my_usize = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
3566 rcu_read_unlock();
3567
2995 warn_if_differ_considerably(mdev, "lower level device sizes", 3568 warn_if_differ_considerably(mdev, "lower level device sizes",
2996 p_size, drbd_get_max_capacity(mdev->ldev)); 3569 p_size, drbd_get_max_capacity(mdev->ldev));
2997 warn_if_differ_considerably(mdev, "user requested size", 3570 warn_if_differ_considerably(mdev, "user requested size",
2998 p_usize, mdev->ldev->dc.disk_size); 3571 p_usize, my_usize);
2999 3572
3000 /* if this is the first connect, or an otherwise expected 3573 /* if this is the first connect, or an otherwise expected
3001 * param exchange, choose the minimum */ 3574 * param exchange, choose the minimum */
3002 if (mdev->state.conn == C_WF_REPORT_PARAMS) 3575 if (mdev->state.conn == C_WF_REPORT_PARAMS)
3003 p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size, 3576 p_usize = min_not_zero(my_usize, p_usize);
3004 p_usize);
3005
3006 my_usize = mdev->ldev->dc.disk_size;
3007
3008 if (mdev->ldev->dc.disk_size != p_usize) {
3009 mdev->ldev->dc.disk_size = p_usize;
3010 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3011 (unsigned long)mdev->ldev->dc.disk_size);
3012 }
3013 3577
3014 /* Never shrink a device with usable data during connect. 3578 /* Never shrink a device with usable data during connect.
3015 But allow online shrinking if we are connected. */ 3579 But allow online shrinking if we are connected. */
3016 if (drbd_new_dev_size(mdev, mdev->ldev, 0) < 3580 if (drbd_new_dev_size(mdev, mdev->ldev, p_usize, 0) <
3017 drbd_get_capacity(mdev->this_bdev) && 3581 drbd_get_capacity(mdev->this_bdev) &&
3018 mdev->state.disk >= D_OUTDATED && 3582 mdev->state.disk >= D_OUTDATED &&
3019 mdev->state.conn < C_CONNECTED) { 3583 mdev->state.conn < C_CONNECTED) {
3020 dev_err(DEV, "The peer's disk size is too small!\n"); 3584 dev_err(DEV, "The peer's disk size is too small!\n");
3021 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 3585 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
3022 mdev->ldev->dc.disk_size = my_usize;
3023 put_ldev(mdev); 3586 put_ldev(mdev);
3024 return false; 3587 return -EIO;
3588 }
3589
3590 if (my_usize != p_usize) {
3591 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3592
3593 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3594 if (!new_disk_conf) {
3595 dev_err(DEV, "Allocation of new disk_conf failed\n");
3596 put_ldev(mdev);
3597 return -ENOMEM;
3598 }
3599
3600 mutex_lock(&mdev->tconn->conf_update);
3601 old_disk_conf = mdev->ldev->disk_conf;
3602 *new_disk_conf = *old_disk_conf;
3603 new_disk_conf->disk_size = p_usize;
3604
3605 rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
3606 mutex_unlock(&mdev->tconn->conf_update);
3607 synchronize_rcu();
3608 kfree(old_disk_conf);
3609
3610 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3611 (unsigned long)my_usize);
3025 } 3612 }
3613
3026 put_ldev(mdev); 3614 put_ldev(mdev);
3027 } 3615 }
3028 3616
@@ -3031,7 +3619,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3031 dd = drbd_determine_dev_size(mdev, ddsf); 3619 dd = drbd_determine_dev_size(mdev, ddsf);
3032 put_ldev(mdev); 3620 put_ldev(mdev);
3033 if (dd == dev_size_error) 3621 if (dd == dev_size_error)
3034 return false; 3622 return -EIO;
3035 drbd_md_sync(mdev); 3623 drbd_md_sync(mdev);
3036 } else { 3624 } else {
3037 /* I am diskless, need to accept the peer's size. */ 3625 /* I am diskless, need to accept the peer's size. */
@@ -3070,16 +3658,25 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3070 } 3658 }
3071 } 3659 }
3072 3660
3073 return true; 3661 return 0;
3074} 3662}
3075 3663
3076static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 3664static int receive_uuids(struct drbd_tconn *tconn, struct packet_info *pi)
3077{ 3665{
3078 struct p_uuids *p = &mdev->data.rbuf.uuids; 3666 struct drbd_conf *mdev;
3667 struct p_uuids *p = pi->data;
3079 u64 *p_uuid; 3668 u64 *p_uuid;
3080 int i, updated_uuids = 0; 3669 int i, updated_uuids = 0;
3081 3670
3671 mdev = vnr_to_mdev(tconn, pi->vnr);
3672 if (!mdev)
3673 return config_unknown_volume(tconn, pi);
3674
3082 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO); 3675 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3676 if (!p_uuid) {
3677 dev_err(DEV, "kmalloc of p_uuid failed\n");
3678 return false;
3679 }
3083 3680
3084 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++) 3681 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3085 p_uuid[i] = be64_to_cpu(p->uuid[i]); 3682 p_uuid[i] = be64_to_cpu(p->uuid[i]);
@@ -3093,14 +3690,14 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3093 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) { 3690 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3094 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n", 3691 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3095 (unsigned long long)mdev->ed_uuid); 3692 (unsigned long long)mdev->ed_uuid);
3096 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 3693 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
3097 return false; 3694 return -EIO;
3098 } 3695 }
3099 3696
3100 if (get_ldev(mdev)) { 3697 if (get_ldev(mdev)) {
3101 int skip_initial_sync = 3698 int skip_initial_sync =
3102 mdev->state.conn == C_CONNECTED && 3699 mdev->state.conn == C_CONNECTED &&
3103 mdev->agreed_pro_version >= 90 && 3700 mdev->tconn->agreed_pro_version >= 90 &&
3104 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && 3701 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3105 (p_uuid[UI_FLAGS] & 8); 3702 (p_uuid[UI_FLAGS] & 8);
3106 if (skip_initial_sync) { 3703 if (skip_initial_sync) {
@@ -3127,14 +3724,15 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3127 ongoing cluster wide state change is finished. That is important if 3724 ongoing cluster wide state change is finished. That is important if
3128 we are primary and are detaching from our disk. We need to see the 3725 we are primary and are detaching from our disk. We need to see the
3129 new disk state... */ 3726 new disk state... */
3130 wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags)); 3727 mutex_lock(mdev->state_mutex);
3728 mutex_unlock(mdev->state_mutex);
3131 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT) 3729 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
3132 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); 3730 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3133 3731
3134 if (updated_uuids) 3732 if (updated_uuids)
3135 drbd_print_uuids(mdev, "receiver updated UUIDs to"); 3733 drbd_print_uuids(mdev, "receiver updated UUIDs to");
3136 3734
3137 return true; 3735 return 0;
3138} 3736}
3139 3737
3140/** 3738/**
@@ -3146,6 +3744,7 @@ static union drbd_state convert_state(union drbd_state ps)
3146 union drbd_state ms; 3744 union drbd_state ms;
3147 3745
3148 static enum drbd_conns c_tab[] = { 3746 static enum drbd_conns c_tab[] = {
3747 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
3149 [C_CONNECTED] = C_CONNECTED, 3748 [C_CONNECTED] = C_CONNECTED,
3150 3749
3151 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T, 3750 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
@@ -3167,40 +3766,74 @@ static union drbd_state convert_state(union drbd_state ps)
3167 return ms; 3766 return ms;
3168} 3767}
3169 3768
3170static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 3769static int receive_req_state(struct drbd_tconn *tconn, struct packet_info *pi)
3171{ 3770{
3172 struct p_req_state *p = &mdev->data.rbuf.req_state; 3771 struct drbd_conf *mdev;
3772 struct p_req_state *p = pi->data;
3173 union drbd_state mask, val; 3773 union drbd_state mask, val;
3174 enum drbd_state_rv rv; 3774 enum drbd_state_rv rv;
3175 3775
3776 mdev = vnr_to_mdev(tconn, pi->vnr);
3777 if (!mdev)
3778 return -EIO;
3779
3176 mask.i = be32_to_cpu(p->mask); 3780 mask.i = be32_to_cpu(p->mask);
3177 val.i = be32_to_cpu(p->val); 3781 val.i = be32_to_cpu(p->val);
3178 3782
3179 if (test_bit(DISCARD_CONCURRENT, &mdev->flags) && 3783 if (test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags) &&
3180 test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) { 3784 mutex_is_locked(mdev->state_mutex)) {
3181 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG); 3785 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
3182 return true; 3786 return 0;
3183 } 3787 }
3184 3788
3185 mask = convert_state(mask); 3789 mask = convert_state(mask);
3186 val = convert_state(val); 3790 val = convert_state(val);
3187 3791
3188 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val); 3792 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3189
3190 drbd_send_sr_reply(mdev, rv); 3793 drbd_send_sr_reply(mdev, rv);
3794
3191 drbd_md_sync(mdev); 3795 drbd_md_sync(mdev);
3192 3796
3193 return true; 3797 return 0;
3194} 3798}
3195 3799
3196static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 3800static int receive_req_conn_state(struct drbd_tconn *tconn, struct packet_info *pi)
3197{ 3801{
3198 struct p_state *p = &mdev->data.rbuf.state; 3802 struct p_req_state *p = pi->data;
3803 union drbd_state mask, val;
3804 enum drbd_state_rv rv;
3805
3806 mask.i = be32_to_cpu(p->mask);
3807 val.i = be32_to_cpu(p->val);
3808
3809 if (test_bit(RESOLVE_CONFLICTS, &tconn->flags) &&
3810 mutex_is_locked(&tconn->cstate_mutex)) {
3811 conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG);
3812 return 0;
3813 }
3814
3815 mask = convert_state(mask);
3816 val = convert_state(val);
3817
3818 rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
3819 conn_send_sr_reply(tconn, rv);
3820
3821 return 0;
3822}
3823
3824static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi)
3825{
3826 struct drbd_conf *mdev;
3827 struct p_state *p = pi->data;
3199 union drbd_state os, ns, peer_state; 3828 union drbd_state os, ns, peer_state;
3200 enum drbd_disk_state real_peer_disk; 3829 enum drbd_disk_state real_peer_disk;
3201 enum chg_state_flags cs_flags; 3830 enum chg_state_flags cs_flags;
3202 int rv; 3831 int rv;
3203 3832
3833 mdev = vnr_to_mdev(tconn, pi->vnr);
3834 if (!mdev)
3835 return config_unknown_volume(tconn, pi);
3836
3204 peer_state.i = be32_to_cpu(p->state); 3837 peer_state.i = be32_to_cpu(p->state);
3205 3838
3206 real_peer_disk = peer_state.disk; 3839 real_peer_disk = peer_state.disk;
@@ -3209,16 +3842,16 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3209 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk)); 3842 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3210 } 3843 }
3211 3844
3212 spin_lock_irq(&mdev->req_lock); 3845 spin_lock_irq(&mdev->tconn->req_lock);
3213 retry: 3846 retry:
3214 os = ns = mdev->state; 3847 os = ns = drbd_read_state(mdev);
3215 spin_unlock_irq(&mdev->req_lock); 3848 spin_unlock_irq(&mdev->tconn->req_lock);
3216 3849
3217 /* If some other part of the code (asender thread, timeout) 3850 /* If some other part of the code (asender thread, timeout)
3218 * already decided to close the connection again, 3851 * already decided to close the connection again,
3219 * we must not "re-establish" it here. */ 3852 * we must not "re-establish" it here. */
3220 if (os.conn <= C_TEAR_DOWN) 3853 if (os.conn <= C_TEAR_DOWN)
3221 return false; 3854 return -ECONNRESET;
3222 3855
3223 /* If this is the "end of sync" confirmation, usually the peer disk 3856 /* If this is the "end of sync" confirmation, usually the peer disk
3224 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits 3857 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
@@ -3246,10 +3879,18 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3246 peer_state.conn == C_CONNECTED) { 3879 peer_state.conn == C_CONNECTED) {
3247 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) 3880 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3248 drbd_resync_finished(mdev); 3881 drbd_resync_finished(mdev);
3249 return true; 3882 return 0;
3250 } 3883 }
3251 } 3884 }
3252 3885
3886 /* explicit verify finished notification, stop sector reached. */
3887 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
3888 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
3889 ov_out_of_sync_print(mdev);
3890 drbd_resync_finished(mdev);
3891 return 0;
3892 }
3893
3253 /* peer says his disk is inconsistent, while we think it is uptodate, 3894 /* peer says his disk is inconsistent, while we think it is uptodate,
3254 * and this happens while the peer still thinks we have a sync going on, 3895 * and this happens while the peer still thinks we have a sync going on,
3255 * but we think we are already done with the sync. 3896 * but we think we are already done with the sync.
@@ -3298,17 +3939,17 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3298 peer_state.disk = D_DISKLESS; 3939 peer_state.disk = D_DISKLESS;
3299 real_peer_disk = D_DISKLESS; 3940 real_peer_disk = D_DISKLESS;
3300 } else { 3941 } else {
3301 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags)) 3942 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->tconn->flags))
3302 return false; 3943 return -EIO;
3303 D_ASSERT(os.conn == C_WF_REPORT_PARAMS); 3944 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
3304 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 3945 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
3305 return false; 3946 return -EIO;
3306 } 3947 }
3307 } 3948 }
3308 } 3949 }
3309 3950
3310 spin_lock_irq(&mdev->req_lock); 3951 spin_lock_irq(&mdev->tconn->req_lock);
3311 if (mdev->state.i != os.i) 3952 if (os.i != drbd_read_state(mdev).i)
3312 goto retry; 3953 goto retry;
3313 clear_bit(CONSIDER_RESYNC, &mdev->flags); 3954 clear_bit(CONSIDER_RESYNC, &mdev->flags);
3314 ns.peer = peer_state.role; 3955 ns.peer = peer_state.role;
@@ -3317,25 +3958,25 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3317 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING) 3958 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
3318 ns.disk = mdev->new_state_tmp.disk; 3959 ns.disk = mdev->new_state_tmp.disk;
3319 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD); 3960 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3320 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED && 3961 if (ns.pdsk == D_CONSISTENT && drbd_suspended(mdev) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
3321 test_bit(NEW_CUR_UUID, &mdev->flags)) { 3962 test_bit(NEW_CUR_UUID, &mdev->flags)) {
3322 /* Do not allow tl_restart(resend) for a rebooted peer. We can only allow this 3963 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
3323 for temporal network outages! */ 3964 for temporal network outages! */
3324 spin_unlock_irq(&mdev->req_lock); 3965 spin_unlock_irq(&mdev->tconn->req_lock);
3325 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n"); 3966 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
3326 tl_clear(mdev); 3967 tl_clear(mdev->tconn);
3327 drbd_uuid_new_current(mdev); 3968 drbd_uuid_new_current(mdev);
3328 clear_bit(NEW_CUR_UUID, &mdev->flags); 3969 clear_bit(NEW_CUR_UUID, &mdev->flags);
3329 drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0)); 3970 conn_request_state(mdev->tconn, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
3330 return false; 3971 return -EIO;
3331 } 3972 }
3332 rv = _drbd_set_state(mdev, ns, cs_flags, NULL); 3973 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
3333 ns = mdev->state; 3974 ns = drbd_read_state(mdev);
3334 spin_unlock_irq(&mdev->req_lock); 3975 spin_unlock_irq(&mdev->tconn->req_lock);
3335 3976
3336 if (rv < SS_SUCCESS) { 3977 if (rv < SS_SUCCESS) {
3337 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 3978 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
3338 return false; 3979 return -EIO;
3339 } 3980 }
3340 3981
3341 if (os.conn > C_WF_REPORT_PARAMS) { 3982 if (os.conn > C_WF_REPORT_PARAMS) {
@@ -3349,16 +3990,21 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3349 } 3990 }
3350 } 3991 }
3351 3992
3352 mdev->net_conf->want_lose = 0; 3993 clear_bit(DISCARD_MY_DATA, &mdev->flags);
3353 3994
3354 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */ 3995 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3355 3996
3356 return true; 3997 return 0;
3357} 3998}
3358 3999
3359static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 4000static int receive_sync_uuid(struct drbd_tconn *tconn, struct packet_info *pi)
3360{ 4001{
3361 struct p_rs_uuid *p = &mdev->data.rbuf.rs_uuid; 4002 struct drbd_conf *mdev;
4003 struct p_rs_uuid *p = pi->data;
4004
4005 mdev = vnr_to_mdev(tconn, pi->vnr);
4006 if (!mdev)
4007 return -EIO;
3362 4008
3363 wait_event(mdev->misc_wait, 4009 wait_event(mdev->misc_wait,
3364 mdev->state.conn == C_WF_SYNC_UUID || 4010 mdev->state.conn == C_WF_SYNC_UUID ||
@@ -3381,7 +4027,7 @@ static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
3381 } else 4027 } else
3382 dev_err(DEV, "Ignoring SyncUUID packet!\n"); 4028 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3383 4029
3384 return true; 4030 return 0;
3385} 4031}
3386 4032
3387/** 4033/**
@@ -3391,27 +4037,27 @@ static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
3391 * code upon failure. 4037 * code upon failure.
3392 */ 4038 */
3393static int 4039static int
3394receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size, 4040receive_bitmap_plain(struct drbd_conf *mdev, unsigned int size,
3395 unsigned long *buffer, struct bm_xfer_ctx *c) 4041 unsigned long *p, struct bm_xfer_ctx *c)
3396{ 4042{
3397 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset); 4043 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
3398 unsigned want = num_words * sizeof(long); 4044 drbd_header_size(mdev->tconn);
4045 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
4046 c->bm_words - c->word_offset);
4047 unsigned int want = num_words * sizeof(*p);
3399 int err; 4048 int err;
3400 4049
3401 if (want != data_size) { 4050 if (want != size) {
3402 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size); 4051 dev_err(DEV, "%s:want (%u) != size (%u)\n", __func__, want, size);
3403 return -EIO; 4052 return -EIO;
3404 } 4053 }
3405 if (want == 0) 4054 if (want == 0)
3406 return 0; 4055 return 0;
3407 err = drbd_recv(mdev, buffer, want); 4056 err = drbd_recv_all(mdev->tconn, p, want);
3408 if (err != want) { 4057 if (err)
3409 if (err >= 0)
3410 err = -EIO;
3411 return err; 4058 return err;
3412 }
3413 4059
3414 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer); 4060 drbd_bm_merge_lel(mdev, c->word_offset, num_words, p);
3415 4061
3416 c->word_offset += num_words; 4062 c->word_offset += num_words;
3417 c->bit_offset = c->word_offset * BITS_PER_LONG; 4063 c->bit_offset = c->word_offset * BITS_PER_LONG;
@@ -3421,6 +4067,21 @@ receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3421 return 1; 4067 return 1;
3422} 4068}
3423 4069
4070static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4071{
4072 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4073}
4074
4075static int dcbp_get_start(struct p_compressed_bm *p)
4076{
4077 return (p->encoding & 0x80) != 0;
4078}
4079
4080static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4081{
4082 return (p->encoding >> 4) & 0x7;
4083}
4084
3424/** 4085/**
3425 * recv_bm_rle_bits 4086 * recv_bm_rle_bits
3426 * 4087 *
@@ -3430,7 +4091,8 @@ receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3430static int 4091static int
3431recv_bm_rle_bits(struct drbd_conf *mdev, 4092recv_bm_rle_bits(struct drbd_conf *mdev,
3432 struct p_compressed_bm *p, 4093 struct p_compressed_bm *p,
3433 struct bm_xfer_ctx *c) 4094 struct bm_xfer_ctx *c,
4095 unsigned int len)
3434{ 4096{
3435 struct bitstream bs; 4097 struct bitstream bs;
3436 u64 look_ahead; 4098 u64 look_ahead;
@@ -3438,12 +4100,11 @@ recv_bm_rle_bits(struct drbd_conf *mdev,
3438 u64 tmp; 4100 u64 tmp;
3439 unsigned long s = c->bit_offset; 4101 unsigned long s = c->bit_offset;
3440 unsigned long e; 4102 unsigned long e;
3441 int len = be16_to_cpu(p->head.length) - (sizeof(*p) - sizeof(p->head)); 4103 int toggle = dcbp_get_start(p);
3442 int toggle = DCBP_get_start(p);
3443 int have; 4104 int have;
3444 int bits; 4105 int bits;
3445 4106
3446 bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p)); 4107 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
3447 4108
3448 bits = bitstream_get_bits(&bs, &look_ahead, 64); 4109 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3449 if (bits < 0) 4110 if (bits < 0)
@@ -3495,17 +4156,18 @@ recv_bm_rle_bits(struct drbd_conf *mdev,
3495static int 4156static int
3496decode_bitmap_c(struct drbd_conf *mdev, 4157decode_bitmap_c(struct drbd_conf *mdev,
3497 struct p_compressed_bm *p, 4158 struct p_compressed_bm *p,
3498 struct bm_xfer_ctx *c) 4159 struct bm_xfer_ctx *c,
4160 unsigned int len)
3499{ 4161{
3500 if (DCBP_get_code(p) == RLE_VLI_Bits) 4162 if (dcbp_get_code(p) == RLE_VLI_Bits)
3501 return recv_bm_rle_bits(mdev, p, c); 4163 return recv_bm_rle_bits(mdev, p, c, len - sizeof(*p));
3502 4164
3503 /* other variants had been implemented for evaluation, 4165 /* other variants had been implemented for evaluation,
3504 * but have been dropped as this one turned out to be "best" 4166 * but have been dropped as this one turned out to be "best"
3505 * during all our tests. */ 4167 * during all our tests. */
3506 4168
3507 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding); 4169 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
3508 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); 4170 conn_request_state(mdev->tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
3509 return -EIO; 4171 return -EIO;
3510} 4172}
3511 4173
@@ -3513,11 +4175,13 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3513 const char *direction, struct bm_xfer_ctx *c) 4175 const char *direction, struct bm_xfer_ctx *c)
3514{ 4176{
3515 /* what would it take to transfer it "plaintext" */ 4177 /* what would it take to transfer it "plaintext" */
3516 unsigned plain = sizeof(struct p_header80) * 4178 unsigned int header_size = drbd_header_size(mdev->tconn);
3517 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1) 4179 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
3518 + c->bm_words * sizeof(long); 4180 unsigned int plain =
3519 unsigned total = c->bytes[0] + c->bytes[1]; 4181 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
3520 unsigned r; 4182 c->bm_words * sizeof(unsigned long);
4183 unsigned int total = c->bytes[0] + c->bytes[1];
4184 unsigned int r;
3521 4185
3522 /* total can not be zero. but just in case: */ 4186 /* total can not be zero. but just in case: */
3523 if (total == 0) 4187 if (total == 0)
@@ -3551,67 +4215,63 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3551 in order to be agnostic to the 32 vs 64 bits issue. 4215 in order to be agnostic to the 32 vs 64 bits issue.
3552 4216
3553 returns 0 on failure, 1 if we successfully received it. */ 4217 returns 0 on failure, 1 if we successfully received it. */
3554static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 4218static int receive_bitmap(struct drbd_tconn *tconn, struct packet_info *pi)
3555{ 4219{
4220 struct drbd_conf *mdev;
3556 struct bm_xfer_ctx c; 4221 struct bm_xfer_ctx c;
3557 void *buffer;
3558 int err; 4222 int err;
3559 int ok = false; 4223
3560 struct p_header80 *h = &mdev->data.rbuf.header.h80; 4224 mdev = vnr_to_mdev(tconn, pi->vnr);
4225 if (!mdev)
4226 return -EIO;
3561 4227
3562 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED); 4228 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3563 /* you are supposed to send additional out-of-sync information 4229 /* you are supposed to send additional out-of-sync information
3564 * if you actually set bits during this phase */ 4230 * if you actually set bits during this phase */
3565 4231
3566 /* maybe we should use some per thread scratch page,
3567 * and allocate that during initial device creation? */
3568 buffer = (unsigned long *) __get_free_page(GFP_NOIO);
3569 if (!buffer) {
3570 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
3571 goto out;
3572 }
3573
3574 c = (struct bm_xfer_ctx) { 4232 c = (struct bm_xfer_ctx) {
3575 .bm_bits = drbd_bm_bits(mdev), 4233 .bm_bits = drbd_bm_bits(mdev),
3576 .bm_words = drbd_bm_words(mdev), 4234 .bm_words = drbd_bm_words(mdev),
3577 }; 4235 };
3578 4236
3579 for(;;) { 4237 for(;;) {
3580 if (cmd == P_BITMAP) { 4238 if (pi->cmd == P_BITMAP)
3581 err = receive_bitmap_plain(mdev, data_size, buffer, &c); 4239 err = receive_bitmap_plain(mdev, pi->size, pi->data, &c);
3582 } else if (cmd == P_COMPRESSED_BITMAP) { 4240 else if (pi->cmd == P_COMPRESSED_BITMAP) {
3583 /* MAYBE: sanity check that we speak proto >= 90, 4241 /* MAYBE: sanity check that we speak proto >= 90,
3584 * and the feature is enabled! */ 4242 * and the feature is enabled! */
3585 struct p_compressed_bm *p; 4243 struct p_compressed_bm *p = pi->data;
3586 4244
3587 if (data_size > BM_PACKET_PAYLOAD_BYTES) { 4245 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(tconn)) {
3588 dev_err(DEV, "ReportCBitmap packet too large\n"); 4246 dev_err(DEV, "ReportCBitmap packet too large\n");
4247 err = -EIO;
3589 goto out; 4248 goto out;
3590 } 4249 }
3591 /* use the page buff */ 4250 if (pi->size <= sizeof(*p)) {
3592 p = buffer; 4251 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", pi->size);
3593 memcpy(p, h, sizeof(*h)); 4252 err = -EIO;
3594 if (drbd_recv(mdev, p->head.payload, data_size) != data_size)
3595 goto out;
3596 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3597 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
3598 goto out; 4253 goto out;
3599 } 4254 }
3600 err = decode_bitmap_c(mdev, p, &c); 4255 err = drbd_recv_all(mdev->tconn, p, pi->size);
4256 if (err)
4257 goto out;
4258 err = decode_bitmap_c(mdev, p, &c, pi->size);
3601 } else { 4259 } else {
3602 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd); 4260 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
4261 err = -EIO;
3603 goto out; 4262 goto out;
3604 } 4263 }
3605 4264
3606 c.packets[cmd == P_BITMAP]++; 4265 c.packets[pi->cmd == P_BITMAP]++;
3607 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header80) + data_size; 4266 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(tconn) + pi->size;
3608 4267
3609 if (err <= 0) { 4268 if (err <= 0) {
3610 if (err < 0) 4269 if (err < 0)
3611 goto out; 4270 goto out;
3612 break; 4271 break;
3613 } 4272 }
3614 if (!drbd_recv_header(mdev, &cmd, &data_size)) 4273 err = drbd_recv_header(mdev->tconn, pi);
4274 if (err)
3615 goto out; 4275 goto out;
3616 } 4276 }
3617 4277
@@ -3620,8 +4280,8 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne
3620 if (mdev->state.conn == C_WF_BITMAP_T) { 4280 if (mdev->state.conn == C_WF_BITMAP_T) {
3621 enum drbd_state_rv rv; 4281 enum drbd_state_rv rv;
3622 4282
3623 ok = !drbd_send_bitmap(mdev); 4283 err = drbd_send_bitmap(mdev);
3624 if (!ok) 4284 if (err)
3625 goto out; 4285 goto out;
3626 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */ 4286 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
3627 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); 4287 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
@@ -3632,47 +4292,40 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne
3632 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n", 4292 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
3633 drbd_conn_str(mdev->state.conn)); 4293 drbd_conn_str(mdev->state.conn));
3634 } 4294 }
4295 err = 0;
3635 4296
3636 ok = true;
3637 out: 4297 out:
3638 drbd_bm_unlock(mdev); 4298 drbd_bm_unlock(mdev);
3639 if (ok && mdev->state.conn == C_WF_BITMAP_S) 4299 if (!err && mdev->state.conn == C_WF_BITMAP_S)
3640 drbd_start_resync(mdev, C_SYNC_SOURCE); 4300 drbd_start_resync(mdev, C_SYNC_SOURCE);
3641 free_page((unsigned long) buffer); 4301 return err;
3642 return ok;
3643} 4302}
3644 4303
3645static int receive_skip(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 4304static int receive_skip(struct drbd_tconn *tconn, struct packet_info *pi)
3646{ 4305{
3647 /* TODO zero copy sink :) */ 4306 conn_warn(tconn, "skipping unknown optional packet type %d, l: %d!\n",
3648 static char sink[128]; 4307 pi->cmd, pi->size);
3649 int size, want, r;
3650 4308
3651 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n", 4309 return ignore_remaining_packet(tconn, pi);
3652 cmd, data_size);
3653
3654 size = data_size;
3655 while (size > 0) {
3656 want = min_t(int, size, sizeof(sink));
3657 r = drbd_recv(mdev, sink, want);
3658 ERR_IF(r <= 0) break;
3659 size -= r;
3660 }
3661 return size == 0;
3662} 4310}
3663 4311
3664static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 4312static int receive_UnplugRemote(struct drbd_tconn *tconn, struct packet_info *pi)
3665{ 4313{
3666 /* Make sure we've acked all the TCP data associated 4314 /* Make sure we've acked all the TCP data associated
3667 * with the data requests being unplugged */ 4315 * with the data requests being unplugged */
3668 drbd_tcp_quickack(mdev->data.socket); 4316 drbd_tcp_quickack(tconn->data.socket);
3669 4317
3670 return true; 4318 return 0;
3671} 4319}
3672 4320
3673static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) 4321static int receive_out_of_sync(struct drbd_tconn *tconn, struct packet_info *pi)
3674{ 4322{
3675 struct p_block_desc *p = &mdev->data.rbuf.block_desc; 4323 struct drbd_conf *mdev;
4324 struct p_block_desc *p = pi->data;
4325
4326 mdev = vnr_to_mdev(tconn, pi->vnr);
4327 if (!mdev)
4328 return -EIO;
3676 4329
3677 switch (mdev->state.conn) { 4330 switch (mdev->state.conn) {
3678 case C_WF_SYNC_UUID: 4331 case C_WF_SYNC_UUID:
@@ -3686,15 +4339,13 @@ static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packets cmd, un
3686 4339
3687 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize)); 4340 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3688 4341
3689 return true; 4342 return 0;
3690} 4343}
3691 4344
3692typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packets cmd, unsigned int to_receive);
3693
3694struct data_cmd { 4345struct data_cmd {
3695 int expect_payload; 4346 int expect_payload;
3696 size_t pkt_size; 4347 size_t pkt_size;
3697 drbd_cmd_handler_f function; 4348 int (*fn)(struct drbd_tconn *, struct packet_info *);
3698}; 4349};
3699 4350
3700static struct data_cmd drbd_cmd_handler[] = { 4351static struct data_cmd drbd_cmd_handler[] = {
@@ -3702,13 +4353,13 @@ static struct data_cmd drbd_cmd_handler[] = {
3702 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply }, 4353 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
3703 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } , 4354 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
3704 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } , 4355 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
3705 [P_BITMAP] = { 1, sizeof(struct p_header80), receive_bitmap } , 4356 [P_BITMAP] = { 1, 0, receive_bitmap } ,
3706 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header80), receive_bitmap } , 4357 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
3707 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header80), receive_UnplugRemote }, 4358 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
3708 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, 4359 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3709 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, 4360 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3710 [P_SYNC_PARAM] = { 1, sizeof(struct p_header80), receive_SyncParam }, 4361 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
3711 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header80), receive_SyncParam }, 4362 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
3712 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol }, 4363 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
3713 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids }, 4364 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
3714 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes }, 4365 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
@@ -3720,124 +4371,75 @@ static struct data_cmd drbd_cmd_handler[] = {
3720 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest }, 4371 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3721 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip }, 4372 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
3722 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync }, 4373 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
3723 /* anything missing from this table is in 4374 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
3724 * the asender_tbl, see get_asender_cmd */ 4375 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
3725 [P_MAX_CMD] = { 0, 0, NULL },
3726}; 4376};
3727 4377
3728/* All handler functions that expect a sub-header get that sub-heder in 4378static void drbdd(struct drbd_tconn *tconn)
3729 mdev->data.rbuf.header.head.payload.
3730
3731 Usually in mdev->data.rbuf.header.head the callback can find the usual
3732 p_header, but they may not rely on that. Since there is also p_header95 !
3733 */
3734
3735static void drbdd(struct drbd_conf *mdev)
3736{ 4379{
3737 union p_header *header = &mdev->data.rbuf.header; 4380 struct packet_info pi;
3738 unsigned int packet_size;
3739 enum drbd_packets cmd;
3740 size_t shs; /* sub header size */ 4381 size_t shs; /* sub header size */
3741 int rv; 4382 int err;
4383
4384 while (get_t_state(&tconn->receiver) == RUNNING) {
4385 struct data_cmd *cmd;
3742 4386
3743 while (get_t_state(&mdev->receiver) == Running) { 4387 drbd_thread_current_set_cpu(&tconn->receiver);
3744 drbd_thread_current_set_cpu(mdev); 4388 if (drbd_recv_header(tconn, &pi))
3745 if (!drbd_recv_header(mdev, &cmd, &packet_size))
3746 goto err_out; 4389 goto err_out;
3747 4390
3748 if (unlikely(cmd >= P_MAX_CMD || !drbd_cmd_handler[cmd].function)) { 4391 cmd = &drbd_cmd_handler[pi.cmd];
3749 dev_err(DEV, "unknown packet type %d, l: %d!\n", cmd, packet_size); 4392 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
4393 conn_err(tconn, "Unexpected data packet %s (0x%04x)",
4394 cmdname(pi.cmd), pi.cmd);
3750 goto err_out; 4395 goto err_out;
3751 } 4396 }
3752 4397
3753 shs = drbd_cmd_handler[cmd].pkt_size - sizeof(union p_header); 4398 shs = cmd->pkt_size;
3754 if (packet_size - shs > 0 && !drbd_cmd_handler[cmd].expect_payload) { 4399 if (pi.size > shs && !cmd->expect_payload) {
3755 dev_err(DEV, "No payload expected %s l:%d\n", cmdname(cmd), packet_size); 4400 conn_err(tconn, "No payload expected %s l:%d\n",
4401 cmdname(pi.cmd), pi.size);
3756 goto err_out; 4402 goto err_out;
3757 } 4403 }
3758 4404
3759 if (shs) { 4405 if (shs) {
3760 rv = drbd_recv(mdev, &header->h80.payload, shs); 4406 err = drbd_recv_all_warn(tconn, pi.data, shs);
3761 if (unlikely(rv != shs)) { 4407 if (err)
3762 if (!signal_pending(current))
3763 dev_warn(DEV, "short read while reading sub header: rv=%d\n", rv);
3764 goto err_out; 4408 goto err_out;
3765 } 4409 pi.size -= shs;
3766 } 4410 }
3767 4411
3768 rv = drbd_cmd_handler[cmd].function(mdev, cmd, packet_size - shs); 4412 err = cmd->fn(tconn, &pi);
3769 4413 if (err) {
3770 if (unlikely(!rv)) { 4414 conn_err(tconn, "error receiving %s, e: %d l: %d!\n",
3771 dev_err(DEV, "error receiving %s, l: %d!\n", 4415 cmdname(pi.cmd), err, pi.size);
3772 cmdname(cmd), packet_size);
3773 goto err_out; 4416 goto err_out;
3774 } 4417 }
3775 } 4418 }
4419 return;
3776 4420
3777 if (0) { 4421 err_out:
3778 err_out: 4422 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
3779 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
3780 }
3781 /* If we leave here, we probably want to update at least the
3782 * "Connected" indicator on stable storage. Do so explicitly here. */
3783 drbd_md_sync(mdev);
3784} 4423}
3785 4424
3786void drbd_flush_workqueue(struct drbd_conf *mdev) 4425void conn_flush_workqueue(struct drbd_tconn *tconn)
3787{ 4426{
3788 struct drbd_wq_barrier barr; 4427 struct drbd_wq_barrier barr;
3789 4428
3790 barr.w.cb = w_prev_work_done; 4429 barr.w.cb = w_prev_work_done;
4430 barr.w.tconn = tconn;
3791 init_completion(&barr.done); 4431 init_completion(&barr.done);
3792 drbd_queue_work(&mdev->data.work, &barr.w); 4432 drbd_queue_work(&tconn->sender_work, &barr.w);
3793 wait_for_completion(&barr.done); 4433 wait_for_completion(&barr.done);
3794} 4434}
3795 4435
3796void drbd_free_tl_hash(struct drbd_conf *mdev) 4436static void conn_disconnect(struct drbd_tconn *tconn)
3797{
3798 struct hlist_head *h;
3799
3800 spin_lock_irq(&mdev->req_lock);
3801
3802 if (!mdev->tl_hash || mdev->state.conn != C_STANDALONE) {
3803 spin_unlock_irq(&mdev->req_lock);
3804 return;
3805 }
3806 /* paranoia code */
3807 for (h = mdev->ee_hash; h < mdev->ee_hash + mdev->ee_hash_s; h++)
3808 if (h->first)
3809 dev_err(DEV, "ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n",
3810 (int)(h - mdev->ee_hash), h->first);
3811 kfree(mdev->ee_hash);
3812 mdev->ee_hash = NULL;
3813 mdev->ee_hash_s = 0;
3814
3815 /* We may not have had the chance to wait for all locally pending
3816 * application requests. The hlist_add_fake() prevents access after
3817 * free on master bio completion. */
3818 for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++) {
3819 struct drbd_request *req;
3820 struct hlist_node *pos, *n;
3821 hlist_for_each_entry_safe(req, pos, n, h, collision) {
3822 hlist_del_init(&req->collision);
3823 hlist_add_fake(&req->collision);
3824 }
3825 }
3826
3827 kfree(mdev->tl_hash);
3828 mdev->tl_hash = NULL;
3829 mdev->tl_hash_s = 0;
3830 spin_unlock_irq(&mdev->req_lock);
3831}
3832
3833static void drbd_disconnect(struct drbd_conf *mdev)
3834{ 4437{
3835 enum drbd_fencing_p fp; 4438 struct drbd_conf *mdev;
3836 union drbd_state os, ns; 4439 enum drbd_conns oc;
3837 int rv = SS_UNKNOWN_ERROR; 4440 int vnr;
3838 unsigned int i;
3839 4441
3840 if (mdev->state.conn == C_STANDALONE) 4442 if (tconn->cstate == C_STANDALONE)
3841 return; 4443 return;
3842 4444
3843 /* We are about to start the cleanup after connection loss. 4445 /* We are about to start the cleanup after connection loss.
@@ -3845,18 +4447,54 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3845 * Usually we should be in some network failure state already, 4447 * Usually we should be in some network failure state already,
3846 * but just in case we are not, we fix it up here. 4448 * but just in case we are not, we fix it up here.
3847 */ 4449 */
3848 drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE)); 4450 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
3849 4451
3850 /* asender does not clean up anything. it must not interfere, either */ 4452 /* asender does not clean up anything. it must not interfere, either */
3851 drbd_thread_stop(&mdev->asender); 4453 drbd_thread_stop(&tconn->asender);
3852 drbd_free_sock(mdev); 4454 drbd_free_sock(tconn);
4455
4456 rcu_read_lock();
4457 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
4458 kref_get(&mdev->kref);
4459 rcu_read_unlock();
4460 drbd_disconnected(mdev);
4461 kref_put(&mdev->kref, &drbd_minor_destroy);
4462 rcu_read_lock();
4463 }
4464 rcu_read_unlock();
4465
4466 if (!list_empty(&tconn->current_epoch->list))
4467 conn_err(tconn, "ASSERTION FAILED: tconn->current_epoch->list not empty\n");
4468 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
4469 atomic_set(&tconn->current_epoch->epoch_size, 0);
4470 tconn->send.seen_any_write_yet = false;
4471
4472 conn_info(tconn, "Connection closed\n");
4473
4474 if (conn_highest_role(tconn) == R_PRIMARY && conn_highest_pdsk(tconn) >= D_UNKNOWN)
4475 conn_try_outdate_peer_async(tconn);
4476
4477 spin_lock_irq(&tconn->req_lock);
4478 oc = tconn->cstate;
4479 if (oc >= C_UNCONNECTED)
4480 _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
4481
4482 spin_unlock_irq(&tconn->req_lock);
4483
4484 if (oc == C_DISCONNECTING)
4485 conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
4486}
4487
4488static int drbd_disconnected(struct drbd_conf *mdev)
4489{
4490 unsigned int i;
3853 4491
3854 /* wait for current activity to cease. */ 4492 /* wait for current activity to cease. */
3855 spin_lock_irq(&mdev->req_lock); 4493 spin_lock_irq(&mdev->tconn->req_lock);
3856 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee); 4494 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
3857 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee); 4495 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
3858 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee); 4496 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
3859 spin_unlock_irq(&mdev->req_lock); 4497 spin_unlock_irq(&mdev->tconn->req_lock);
3860 4498
3861 /* We do not have data structures that would allow us to 4499 /* We do not have data structures that would allow us to
3862 * get the rs_pending_cnt down to 0 again. 4500 * get the rs_pending_cnt down to 0 again.
@@ -3874,7 +4512,6 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3874 atomic_set(&mdev->rs_pending_cnt, 0); 4512 atomic_set(&mdev->rs_pending_cnt, 0);
3875 wake_up(&mdev->misc_wait); 4513 wake_up(&mdev->misc_wait);
3876 4514
3877 /* make sure syncer is stopped and w_resume_next_sg queued */
3878 del_timer_sync(&mdev->resync_timer); 4515 del_timer_sync(&mdev->resync_timer);
3879 resync_timer_fn((unsigned long)mdev); 4516 resync_timer_fn((unsigned long)mdev);
3880 4517
@@ -3883,50 +4520,25 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3883 * to be "canceled" */ 4520 * to be "canceled" */
3884 drbd_flush_workqueue(mdev); 4521 drbd_flush_workqueue(mdev);
3885 4522
3886 /* This also does reclaim_net_ee(). If we do this too early, we might 4523 drbd_finish_peer_reqs(mdev);
3887 * miss some resync ee and pages.*/ 4524
3888 drbd_process_done_ee(mdev); 4525 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4526 might have issued a work again. The one before drbd_finish_peer_reqs() is
4527 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
4528 drbd_flush_workqueue(mdev);
4529
4530 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4531 * again via drbd_try_clear_on_disk_bm(). */
4532 drbd_rs_cancel_all(mdev);
3889 4533
3890 kfree(mdev->p_uuid); 4534 kfree(mdev->p_uuid);
3891 mdev->p_uuid = NULL; 4535 mdev->p_uuid = NULL;
3892 4536
3893 if (!is_susp(mdev->state)) 4537 if (!drbd_suspended(mdev))
3894 tl_clear(mdev); 4538 tl_clear(mdev->tconn);
3895
3896 dev_info(DEV, "Connection closed\n");
3897 4539
3898 drbd_md_sync(mdev); 4540 drbd_md_sync(mdev);
3899 4541
3900 fp = FP_DONT_CARE;
3901 if (get_ldev(mdev)) {
3902 fp = mdev->ldev->dc.fencing;
3903 put_ldev(mdev);
3904 }
3905
3906 if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
3907 drbd_try_outdate_peer_async(mdev);
3908
3909 spin_lock_irq(&mdev->req_lock);
3910 os = mdev->state;
3911 if (os.conn >= C_UNCONNECTED) {
3912 /* Do not restart in case we are C_DISCONNECTING */
3913 ns = os;
3914 ns.conn = C_UNCONNECTED;
3915 rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
3916 }
3917 spin_unlock_irq(&mdev->req_lock);
3918
3919 if (os.conn == C_DISCONNECTING) {
3920 wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0);
3921
3922 crypto_free_hash(mdev->cram_hmac_tfm);
3923 mdev->cram_hmac_tfm = NULL;
3924
3925 kfree(mdev->net_conf);
3926 mdev->net_conf = NULL;
3927 drbd_request_state(mdev, NS(conn, C_STANDALONE));
3928 }
3929
3930 /* serialize with bitmap writeout triggered by the state change, 4542 /* serialize with bitmap writeout triggered by the state change,
3931 * if any. */ 4543 * if any. */
3932 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); 4544 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
@@ -3938,7 +4550,7 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3938 * Actually we don't care for exactly when the network stack does its 4550 * Actually we don't care for exactly when the network stack does its
3939 * put_page(), but release our reference on these pages right here. 4551 * put_page(), but release our reference on these pages right here.
3940 */ 4552 */
3941 i = drbd_release_ee(mdev, &mdev->net_ee); 4553 i = drbd_free_peer_reqs(mdev, &mdev->net_ee);
3942 if (i) 4554 if (i)
3943 dev_info(DEV, "net_ee not empty, killed %u entries\n", i); 4555 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
3944 i = atomic_read(&mdev->pp_in_use_by_net); 4556 i = atomic_read(&mdev->pp_in_use_by_net);
@@ -3953,9 +4565,7 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3953 D_ASSERT(list_empty(&mdev->sync_ee)); 4565 D_ASSERT(list_empty(&mdev->sync_ee));
3954 D_ASSERT(list_empty(&mdev->done_ee)); 4566 D_ASSERT(list_empty(&mdev->done_ee));
3955 4567
3956 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */ 4568 return 0;
3957 atomic_set(&mdev->current_epoch->epoch_size, 0);
3958 D_ASSERT(list_empty(&mdev->current_epoch->list));
3959} 4569}
3960 4570
3961/* 4571/*
@@ -3967,29 +4577,19 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3967 * 4577 *
3968 * for now, they are expected to be zero, but ignored. 4578 * for now, they are expected to be zero, but ignored.
3969 */ 4579 */
3970static int drbd_send_handshake(struct drbd_conf *mdev) 4580static int drbd_send_features(struct drbd_tconn *tconn)
3971{ 4581{
3972 /* ASSERT current == mdev->receiver ... */ 4582 struct drbd_socket *sock;
3973 struct p_handshake *p = &mdev->data.sbuf.handshake; 4583 struct p_connection_features *p;
3974 int ok;
3975
3976 if (mutex_lock_interruptible(&mdev->data.mutex)) {
3977 dev_err(DEV, "interrupted during initial handshake\n");
3978 return 0; /* interrupted. not ok. */
3979 }
3980
3981 if (mdev->data.socket == NULL) {
3982 mutex_unlock(&mdev->data.mutex);
3983 return 0;
3984 }
3985 4584
4585 sock = &tconn->data;
4586 p = conn_prepare_command(tconn, sock);
4587 if (!p)
4588 return -EIO;
3986 memset(p, 0, sizeof(*p)); 4589 memset(p, 0, sizeof(*p));
3987 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); 4590 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
3988 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); 4591 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
3989 ok = _drbd_send_cmd( mdev, mdev->data.socket, P_HAND_SHAKE, 4592 return conn_send_command(tconn, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
3990 (struct p_header80 *)p, sizeof(*p), 0 );
3991 mutex_unlock(&mdev->data.mutex);
3992 return ok;
3993} 4593}
3994 4594
3995/* 4595/*
@@ -3999,42 +4599,38 @@ static int drbd_send_handshake(struct drbd_conf *mdev)
3999 * -1 peer talks different language, 4599 * -1 peer talks different language,
4000 * no point in trying again, please go standalone. 4600 * no point in trying again, please go standalone.
4001 */ 4601 */
4002static int drbd_do_handshake(struct drbd_conf *mdev) 4602static int drbd_do_features(struct drbd_tconn *tconn)
4003{ 4603{
4004 /* ASSERT current == mdev->receiver ... */ 4604 /* ASSERT current == tconn->receiver ... */
4005 struct p_handshake *p = &mdev->data.rbuf.handshake; 4605 struct p_connection_features *p;
4006 const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80); 4606 const int expect = sizeof(struct p_connection_features);
4007 unsigned int length; 4607 struct packet_info pi;
4008 enum drbd_packets cmd; 4608 int err;
4009 int rv;
4010 4609
4011 rv = drbd_send_handshake(mdev); 4610 err = drbd_send_features(tconn);
4012 if (!rv) 4611 if (err)
4013 return 0; 4612 return 0;
4014 4613
4015 rv = drbd_recv_header(mdev, &cmd, &length); 4614 err = drbd_recv_header(tconn, &pi);
4016 if (!rv) 4615 if (err)
4017 return 0; 4616 return 0;
4018 4617
4019 if (cmd != P_HAND_SHAKE) { 4618 if (pi.cmd != P_CONNECTION_FEATURES) {
4020 dev_err(DEV, "expected HandShake packet, received: %s (0x%04x)\n", 4619 conn_err(tconn, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
4021 cmdname(cmd), cmd); 4620 cmdname(pi.cmd), pi.cmd);
4022 return -1; 4621 return -1;
4023 } 4622 }
4024 4623
4025 if (length != expect) { 4624 if (pi.size != expect) {
4026 dev_err(DEV, "expected HandShake length: %u, received: %u\n", 4625 conn_err(tconn, "expected ConnectionFeatures length: %u, received: %u\n",
4027 expect, length); 4626 expect, pi.size);
4028 return -1; 4627 return -1;
4029 } 4628 }
4030 4629
4031 rv = drbd_recv(mdev, &p->head.payload, expect); 4630 p = pi.data;
4032 4631 err = drbd_recv_all_warn(tconn, p, expect);
4033 if (rv != expect) { 4632 if (err)
4034 if (!signal_pending(current))
4035 dev_warn(DEV, "short read receiving handshake packet: l=%u\n", rv);
4036 return 0; 4633 return 0;
4037 }
4038 4634
4039 p->protocol_min = be32_to_cpu(p->protocol_min); 4635 p->protocol_min = be32_to_cpu(p->protocol_min);
4040 p->protocol_max = be32_to_cpu(p->protocol_max); 4636 p->protocol_max = be32_to_cpu(p->protocol_max);
@@ -4045,15 +4641,15 @@ static int drbd_do_handshake(struct drbd_conf *mdev)
4045 PRO_VERSION_MIN > p->protocol_max) 4641 PRO_VERSION_MIN > p->protocol_max)
4046 goto incompat; 4642 goto incompat;
4047 4643
4048 mdev->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max); 4644 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
4049 4645
4050 dev_info(DEV, "Handshake successful: " 4646 conn_info(tconn, "Handshake successful: "
4051 "Agreed network protocol version %d\n", mdev->agreed_pro_version); 4647 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
4052 4648
4053 return 1; 4649 return 1;
4054 4650
4055 incompat: 4651 incompat:
4056 dev_err(DEV, "incompatible DRBD dialects: " 4652 conn_err(tconn, "incompatible DRBD dialects: "
4057 "I support %d-%d, peer supports %d-%d\n", 4653 "I support %d-%d, peer supports %d-%d\n",
4058 PRO_VERSION_MIN, PRO_VERSION_MAX, 4654 PRO_VERSION_MIN, PRO_VERSION_MAX,
4059 p->protocol_min, p->protocol_max); 4655 p->protocol_min, p->protocol_max);
@@ -4061,7 +4657,7 @@ static int drbd_do_handshake(struct drbd_conf *mdev)
4061} 4657}
4062 4658
4063#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE) 4659#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
4064static int drbd_do_auth(struct drbd_conf *mdev) 4660static int drbd_do_auth(struct drbd_tconn *tconn)
4065{ 4661{
4066 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n"); 4662 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4067 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n"); 4663 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
@@ -4076,121 +4672,139 @@ static int drbd_do_auth(struct drbd_conf *mdev)
4076 -1 - auth failed, don't try again. 4672 -1 - auth failed, don't try again.
4077*/ 4673*/
4078 4674
4079static int drbd_do_auth(struct drbd_conf *mdev) 4675static int drbd_do_auth(struct drbd_tconn *tconn)
4080{ 4676{
4677 struct drbd_socket *sock;
4081 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */ 4678 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4082 struct scatterlist sg; 4679 struct scatterlist sg;
4083 char *response = NULL; 4680 char *response = NULL;
4084 char *right_response = NULL; 4681 char *right_response = NULL;
4085 char *peers_ch = NULL; 4682 char *peers_ch = NULL;
4086 unsigned int key_len = strlen(mdev->net_conf->shared_secret); 4683 unsigned int key_len;
4684 char secret[SHARED_SECRET_MAX]; /* 64 byte */
4087 unsigned int resp_size; 4685 unsigned int resp_size;
4088 struct hash_desc desc; 4686 struct hash_desc desc;
4089 enum drbd_packets cmd; 4687 struct packet_info pi;
4090 unsigned int length; 4688 struct net_conf *nc;
4091 int rv; 4689 int err, rv;
4690
4691 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
4092 4692
4093 desc.tfm = mdev->cram_hmac_tfm; 4693 rcu_read_lock();
4694 nc = rcu_dereference(tconn->net_conf);
4695 key_len = strlen(nc->shared_secret);
4696 memcpy(secret, nc->shared_secret, key_len);
4697 rcu_read_unlock();
4698
4699 desc.tfm = tconn->cram_hmac_tfm;
4094 desc.flags = 0; 4700 desc.flags = 0;
4095 4701
4096 rv = crypto_hash_setkey(mdev->cram_hmac_tfm, 4702 rv = crypto_hash_setkey(tconn->cram_hmac_tfm, (u8 *)secret, key_len);
4097 (u8 *)mdev->net_conf->shared_secret, key_len);
4098 if (rv) { 4703 if (rv) {
4099 dev_err(DEV, "crypto_hash_setkey() failed with %d\n", rv); 4704 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
4100 rv = -1; 4705 rv = -1;
4101 goto fail; 4706 goto fail;
4102 } 4707 }
4103 4708
4104 get_random_bytes(my_challenge, CHALLENGE_LEN); 4709 get_random_bytes(my_challenge, CHALLENGE_LEN);
4105 4710
4106 rv = drbd_send_cmd2(mdev, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN); 4711 sock = &tconn->data;
4712 if (!conn_prepare_command(tconn, sock)) {
4713 rv = 0;
4714 goto fail;
4715 }
4716 rv = !conn_send_command(tconn, sock, P_AUTH_CHALLENGE, 0,
4717 my_challenge, CHALLENGE_LEN);
4107 if (!rv) 4718 if (!rv)
4108 goto fail; 4719 goto fail;
4109 4720
4110 rv = drbd_recv_header(mdev, &cmd, &length); 4721 err = drbd_recv_header(tconn, &pi);
4111 if (!rv) 4722 if (err) {
4723 rv = 0;
4112 goto fail; 4724 goto fail;
4725 }
4113 4726
4114 if (cmd != P_AUTH_CHALLENGE) { 4727 if (pi.cmd != P_AUTH_CHALLENGE) {
4115 dev_err(DEV, "expected AuthChallenge packet, received: %s (0x%04x)\n", 4728 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
4116 cmdname(cmd), cmd); 4729 cmdname(pi.cmd), pi.cmd);
4117 rv = 0; 4730 rv = 0;
4118 goto fail; 4731 goto fail;
4119 } 4732 }
4120 4733
4121 if (length > CHALLENGE_LEN * 2) { 4734 if (pi.size > CHALLENGE_LEN * 2) {
4122 dev_err(DEV, "expected AuthChallenge payload too big.\n"); 4735 conn_err(tconn, "expected AuthChallenge payload too big.\n");
4123 rv = -1; 4736 rv = -1;
4124 goto fail; 4737 goto fail;
4125 } 4738 }
4126 4739
4127 peers_ch = kmalloc(length, GFP_NOIO); 4740 peers_ch = kmalloc(pi.size, GFP_NOIO);
4128 if (peers_ch == NULL) { 4741 if (peers_ch == NULL) {
4129 dev_err(DEV, "kmalloc of peers_ch failed\n"); 4742 conn_err(tconn, "kmalloc of peers_ch failed\n");
4130 rv = -1; 4743 rv = -1;
4131 goto fail; 4744 goto fail;
4132 } 4745 }
4133 4746
4134 rv = drbd_recv(mdev, peers_ch, length); 4747 err = drbd_recv_all_warn(tconn, peers_ch, pi.size);
4135 4748 if (err) {
4136 if (rv != length) {
4137 if (!signal_pending(current))
4138 dev_warn(DEV, "short read AuthChallenge: l=%u\n", rv);
4139 rv = 0; 4749 rv = 0;
4140 goto fail; 4750 goto fail;
4141 } 4751 }
4142 4752
4143 resp_size = crypto_hash_digestsize(mdev->cram_hmac_tfm); 4753 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
4144 response = kmalloc(resp_size, GFP_NOIO); 4754 response = kmalloc(resp_size, GFP_NOIO);
4145 if (response == NULL) { 4755 if (response == NULL) {
4146 dev_err(DEV, "kmalloc of response failed\n"); 4756 conn_err(tconn, "kmalloc of response failed\n");
4147 rv = -1; 4757 rv = -1;
4148 goto fail; 4758 goto fail;
4149 } 4759 }
4150 4760
4151 sg_init_table(&sg, 1); 4761 sg_init_table(&sg, 1);
4152 sg_set_buf(&sg, peers_ch, length); 4762 sg_set_buf(&sg, peers_ch, pi.size);
4153 4763
4154 rv = crypto_hash_digest(&desc, &sg, sg.length, response); 4764 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4155 if (rv) { 4765 if (rv) {
4156 dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv); 4766 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
4157 rv = -1; 4767 rv = -1;
4158 goto fail; 4768 goto fail;
4159 } 4769 }
4160 4770
4161 rv = drbd_send_cmd2(mdev, P_AUTH_RESPONSE, response, resp_size); 4771 if (!conn_prepare_command(tconn, sock)) {
4162 if (!rv) 4772 rv = 0;
4163 goto fail; 4773 goto fail;
4164 4774 }
4165 rv = drbd_recv_header(mdev, &cmd, &length); 4775 rv = !conn_send_command(tconn, sock, P_AUTH_RESPONSE, 0,
4776 response, resp_size);
4166 if (!rv) 4777 if (!rv)
4167 goto fail; 4778 goto fail;
4168 4779
4169 if (cmd != P_AUTH_RESPONSE) { 4780 err = drbd_recv_header(tconn, &pi);
4170 dev_err(DEV, "expected AuthResponse packet, received: %s (0x%04x)\n", 4781 if (err) {
4171 cmdname(cmd), cmd);
4172 rv = 0; 4782 rv = 0;
4173 goto fail; 4783 goto fail;
4174 } 4784 }
4175 4785
4176 if (length != resp_size) { 4786 if (pi.cmd != P_AUTH_RESPONSE) {
4177 dev_err(DEV, "expected AuthResponse payload of wrong size\n"); 4787 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
4788 cmdname(pi.cmd), pi.cmd);
4178 rv = 0; 4789 rv = 0;
4179 goto fail; 4790 goto fail;
4180 } 4791 }
4181 4792
4182 rv = drbd_recv(mdev, response , resp_size); 4793 if (pi.size != resp_size) {
4794 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
4795 rv = 0;
4796 goto fail;
4797 }
4183 4798
4184 if (rv != resp_size) { 4799 err = drbd_recv_all_warn(tconn, response , resp_size);
4185 if (!signal_pending(current)) 4800 if (err) {
4186 dev_warn(DEV, "short read receiving AuthResponse: l=%u\n", rv);
4187 rv = 0; 4801 rv = 0;
4188 goto fail; 4802 goto fail;
4189 } 4803 }
4190 4804
4191 right_response = kmalloc(resp_size, GFP_NOIO); 4805 right_response = kmalloc(resp_size, GFP_NOIO);
4192 if (right_response == NULL) { 4806 if (right_response == NULL) {
4193 dev_err(DEV, "kmalloc of right_response failed\n"); 4807 conn_err(tconn, "kmalloc of right_response failed\n");
4194 rv = -1; 4808 rv = -1;
4195 goto fail; 4809 goto fail;
4196 } 4810 }
@@ -4199,7 +4813,7 @@ static int drbd_do_auth(struct drbd_conf *mdev)
4199 4813
4200 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response); 4814 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4201 if (rv) { 4815 if (rv) {
4202 dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv); 4816 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
4203 rv = -1; 4817 rv = -1;
4204 goto fail; 4818 goto fail;
4205 } 4819 }
@@ -4207,8 +4821,8 @@ static int drbd_do_auth(struct drbd_conf *mdev)
4207 rv = !memcmp(response, right_response, resp_size); 4821 rv = !memcmp(response, right_response, resp_size);
4208 4822
4209 if (rv) 4823 if (rv)
4210 dev_info(DEV, "Peer authenticated using %d bytes of '%s' HMAC\n", 4824 conn_info(tconn, "Peer authenticated using %d bytes HMAC\n",
4211 resp_size, mdev->net_conf->cram_hmac_alg); 4825 resp_size);
4212 else 4826 else
4213 rv = -1; 4827 rv = -1;
4214 4828
@@ -4223,82 +4837,106 @@ static int drbd_do_auth(struct drbd_conf *mdev)
4223 4837
4224int drbdd_init(struct drbd_thread *thi) 4838int drbdd_init(struct drbd_thread *thi)
4225{ 4839{
4226 struct drbd_conf *mdev = thi->mdev; 4840 struct drbd_tconn *tconn = thi->tconn;
4227 unsigned int minor = mdev_to_minor(mdev);
4228 int h; 4841 int h;
4229 4842
4230 sprintf(current->comm, "drbd%d_receiver", minor); 4843 conn_info(tconn, "receiver (re)started\n");
4231
4232 dev_info(DEV, "receiver (re)started\n");
4233 4844
4234 do { 4845 do {
4235 h = drbd_connect(mdev); 4846 h = conn_connect(tconn);
4236 if (h == 0) { 4847 if (h == 0) {
4237 drbd_disconnect(mdev); 4848 conn_disconnect(tconn);
4238 schedule_timeout_interruptible(HZ); 4849 schedule_timeout_interruptible(HZ);
4239 } 4850 }
4240 if (h == -1) { 4851 if (h == -1) {
4241 dev_warn(DEV, "Discarding network configuration.\n"); 4852 conn_warn(tconn, "Discarding network configuration.\n");
4242 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 4853 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
4243 } 4854 }
4244 } while (h == 0); 4855 } while (h == 0);
4245 4856
4246 if (h > 0) { 4857 if (h > 0)
4247 if (get_net_conf(mdev)) { 4858 drbdd(tconn);
4248 drbdd(mdev);
4249 put_net_conf(mdev);
4250 }
4251 }
4252 4859
4253 drbd_disconnect(mdev); 4860 conn_disconnect(tconn);
4254 4861
4255 dev_info(DEV, "receiver terminated\n"); 4862 conn_info(tconn, "receiver terminated\n");
4256 return 0; 4863 return 0;
4257} 4864}
4258 4865
4259/* ********* acknowledge sender ******** */ 4866/* ********* acknowledge sender ******** */
4260 4867
4261static int got_RqSReply(struct drbd_conf *mdev, struct p_header80 *h) 4868static int got_conn_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
4262{ 4869{
4263 struct p_req_state_reply *p = (struct p_req_state_reply *)h; 4870 struct p_req_state_reply *p = pi->data;
4871 int retcode = be32_to_cpu(p->retcode);
4872
4873 if (retcode >= SS_SUCCESS) {
4874 set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
4875 } else {
4876 set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
4877 conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
4878 drbd_set_st_err_str(retcode), retcode);
4879 }
4880 wake_up(&tconn->ping_wait);
4881
4882 return 0;
4883}
4264 4884
4885static int got_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
4886{
4887 struct drbd_conf *mdev;
4888 struct p_req_state_reply *p = pi->data;
4265 int retcode = be32_to_cpu(p->retcode); 4889 int retcode = be32_to_cpu(p->retcode);
4266 4890
4891 mdev = vnr_to_mdev(tconn, pi->vnr);
4892 if (!mdev)
4893 return -EIO;
4894
4895 if (test_bit(CONN_WD_ST_CHG_REQ, &tconn->flags)) {
4896 D_ASSERT(tconn->agreed_pro_version < 100);
4897 return got_conn_RqSReply(tconn, pi);
4898 }
4899
4267 if (retcode >= SS_SUCCESS) { 4900 if (retcode >= SS_SUCCESS) {
4268 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags); 4901 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4269 } else { 4902 } else {
4270 set_bit(CL_ST_CHG_FAIL, &mdev->flags); 4903 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4271 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n", 4904 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4272 drbd_set_st_err_str(retcode), retcode); 4905 drbd_set_st_err_str(retcode), retcode);
4273 } 4906 }
4274 wake_up(&mdev->state_wait); 4907 wake_up(&mdev->state_wait);
4275 4908
4276 return true; 4909 return 0;
4277} 4910}
4278 4911
4279static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h) 4912static int got_Ping(struct drbd_tconn *tconn, struct packet_info *pi)
4280{ 4913{
4281 return drbd_send_ping_ack(mdev); 4914 return drbd_send_ping_ack(tconn);
4282 4915
4283} 4916}
4284 4917
4285static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h) 4918static int got_PingAck(struct drbd_tconn *tconn, struct packet_info *pi)
4286{ 4919{
4287 /* restore idle timeout */ 4920 /* restore idle timeout */
4288 mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ; 4921 tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
4289 if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags)) 4922 if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
4290 wake_up(&mdev->misc_wait); 4923 wake_up(&tconn->ping_wait);
4291 4924
4292 return true; 4925 return 0;
4293} 4926}
4294 4927
4295static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) 4928static int got_IsInSync(struct drbd_tconn *tconn, struct packet_info *pi)
4296{ 4929{
4297 struct p_block_ack *p = (struct p_block_ack *)h; 4930 struct drbd_conf *mdev;
4931 struct p_block_ack *p = pi->data;
4298 sector_t sector = be64_to_cpu(p->sector); 4932 sector_t sector = be64_to_cpu(p->sector);
4299 int blksize = be32_to_cpu(p->blksize); 4933 int blksize = be32_to_cpu(p->blksize);
4300 4934
4301 D_ASSERT(mdev->agreed_pro_version >= 89); 4935 mdev = vnr_to_mdev(tconn, pi->vnr);
4936 if (!mdev)
4937 return -EIO;
4938
4939 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
4302 4940
4303 update_peer_seq(mdev, be32_to_cpu(p->seq_num)); 4941 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4304 4942
@@ -4312,162 +4950,139 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h)
4312 dec_rs_pending(mdev); 4950 dec_rs_pending(mdev);
4313 atomic_add(blksize >> 9, &mdev->rs_sect_in); 4951 atomic_add(blksize >> 9, &mdev->rs_sect_in);
4314 4952
4315 return true; 4953 return 0;
4316}
4317
4318/* when we receive the ACK for a write request,
4319 * verify that we actually know about it */
4320static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev,
4321 u64 id, sector_t sector)
4322{
4323 struct hlist_head *slot = tl_hash_slot(mdev, sector);
4324 struct hlist_node *n;
4325 struct drbd_request *req;
4326
4327 hlist_for_each_entry(req, n, slot, collision) {
4328 if ((unsigned long)req == (unsigned long)id) {
4329 if (req->sector != sector) {
4330 dev_err(DEV, "_ack_id_to_req: found req %p but it has "
4331 "wrong sector (%llus versus %llus)\n", req,
4332 (unsigned long long)req->sector,
4333 (unsigned long long)sector);
4334 break;
4335 }
4336 return req;
4337 }
4338 }
4339 return NULL;
4340} 4954}
4341 4955
4342typedef struct drbd_request *(req_validator_fn) 4956static int
4343 (struct drbd_conf *mdev, u64 id, sector_t sector); 4957validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4344 4958 struct rb_root *root, const char *func,
4345static int validate_req_change_req_state(struct drbd_conf *mdev, 4959 enum drbd_req_event what, bool missing_ok)
4346 u64 id, sector_t sector, req_validator_fn validator,
4347 const char *func, enum drbd_req_event what)
4348{ 4960{
4349 struct drbd_request *req; 4961 struct drbd_request *req;
4350 struct bio_and_error m; 4962 struct bio_and_error m;
4351 4963
4352 spin_lock_irq(&mdev->req_lock); 4964 spin_lock_irq(&mdev->tconn->req_lock);
4353 req = validator(mdev, id, sector); 4965 req = find_request(mdev, root, id, sector, missing_ok, func);
4354 if (unlikely(!req)) { 4966 if (unlikely(!req)) {
4355 spin_unlock_irq(&mdev->req_lock); 4967 spin_unlock_irq(&mdev->tconn->req_lock);
4356 4968 return -EIO;
4357 dev_err(DEV, "%s: failed to find req %p, sector %llus\n", func,
4358 (void *)(unsigned long)id, (unsigned long long)sector);
4359 return false;
4360 } 4969 }
4361 __req_mod(req, what, &m); 4970 __req_mod(req, what, &m);
4362 spin_unlock_irq(&mdev->req_lock); 4971 spin_unlock_irq(&mdev->tconn->req_lock);
4363 4972
4364 if (m.bio) 4973 if (m.bio)
4365 complete_master_bio(mdev, &m); 4974 complete_master_bio(mdev, &m);
4366 return true; 4975 return 0;
4367} 4976}
4368 4977
4369static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) 4978static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi)
4370{ 4979{
4371 struct p_block_ack *p = (struct p_block_ack *)h; 4980 struct drbd_conf *mdev;
4981 struct p_block_ack *p = pi->data;
4372 sector_t sector = be64_to_cpu(p->sector); 4982 sector_t sector = be64_to_cpu(p->sector);
4373 int blksize = be32_to_cpu(p->blksize); 4983 int blksize = be32_to_cpu(p->blksize);
4374 enum drbd_req_event what; 4984 enum drbd_req_event what;
4375 4985
4986 mdev = vnr_to_mdev(tconn, pi->vnr);
4987 if (!mdev)
4988 return -EIO;
4989
4376 update_peer_seq(mdev, be32_to_cpu(p->seq_num)); 4990 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4377 4991
4378 if (is_syncer_block_id(p->block_id)) { 4992 if (p->block_id == ID_SYNCER) {
4379 drbd_set_in_sync(mdev, sector, blksize); 4993 drbd_set_in_sync(mdev, sector, blksize);
4380 dec_rs_pending(mdev); 4994 dec_rs_pending(mdev);
4381 return true; 4995 return 0;
4382 } 4996 }
4383 switch (be16_to_cpu(h->command)) { 4997 switch (pi->cmd) {
4384 case P_RS_WRITE_ACK: 4998 case P_RS_WRITE_ACK:
4385 D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); 4999 what = WRITE_ACKED_BY_PEER_AND_SIS;
4386 what = write_acked_by_peer_and_sis;
4387 break; 5000 break;
4388 case P_WRITE_ACK: 5001 case P_WRITE_ACK:
4389 D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); 5002 what = WRITE_ACKED_BY_PEER;
4390 what = write_acked_by_peer;
4391 break; 5003 break;
4392 case P_RECV_ACK: 5004 case P_RECV_ACK:
4393 D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_B); 5005 what = RECV_ACKED_BY_PEER;
4394 what = recv_acked_by_peer;
4395 break; 5006 break;
4396 case P_DISCARD_ACK: 5007 case P_SUPERSEDED:
4397 D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); 5008 what = CONFLICT_RESOLVED;
4398 what = conflict_discarded_by_peer; 5009 break;
5010 case P_RETRY_WRITE:
5011 what = POSTPONE_WRITE;
4399 break; 5012 break;
4400 default: 5013 default:
4401 D_ASSERT(0); 5014 BUG();
4402 return false;
4403 } 5015 }
4404 5016
4405 return validate_req_change_req_state(mdev, p->block_id, sector, 5017 return validate_req_change_req_state(mdev, p->block_id, sector,
4406 _ack_id_to_req, __func__ , what); 5018 &mdev->write_requests, __func__,
5019 what, false);
4407} 5020}
4408 5021
4409static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) 5022static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi)
4410{ 5023{
4411 struct p_block_ack *p = (struct p_block_ack *)h; 5024 struct drbd_conf *mdev;
5025 struct p_block_ack *p = pi->data;
4412 sector_t sector = be64_to_cpu(p->sector); 5026 sector_t sector = be64_to_cpu(p->sector);
4413 int size = be32_to_cpu(p->blksize); 5027 int size = be32_to_cpu(p->blksize);
4414 struct drbd_request *req; 5028 int err;
4415 struct bio_and_error m; 5029
5030 mdev = vnr_to_mdev(tconn, pi->vnr);
5031 if (!mdev)
5032 return -EIO;
4416 5033
4417 update_peer_seq(mdev, be32_to_cpu(p->seq_num)); 5034 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4418 5035
4419 if (is_syncer_block_id(p->block_id)) { 5036 if (p->block_id == ID_SYNCER) {
4420 dec_rs_pending(mdev); 5037 dec_rs_pending(mdev);
4421 drbd_rs_failed_io(mdev, sector, size); 5038 drbd_rs_failed_io(mdev, sector, size);
4422 return true; 5039 return 0;
4423 } 5040 }
4424 5041
4425 spin_lock_irq(&mdev->req_lock); 5042 err = validate_req_change_req_state(mdev, p->block_id, sector,
4426 req = _ack_id_to_req(mdev, p->block_id, sector); 5043 &mdev->write_requests, __func__,
4427 if (!req) { 5044 NEG_ACKED, true);
4428 spin_unlock_irq(&mdev->req_lock); 5045 if (err) {
4429 if (mdev->net_conf->wire_protocol == DRBD_PROT_A || 5046 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4430 mdev->net_conf->wire_protocol == DRBD_PROT_B) { 5047 The master bio might already be completed, therefore the
4431 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs. 5048 request is no longer in the collision hash. */
4432 The master bio might already be completed, therefore the 5049 /* In Protocol B we might already have got a P_RECV_ACK
4433 request is no longer in the collision hash. 5050 but then get a P_NEG_ACK afterwards. */
4434 => Do not try to validate block_id as request. */ 5051 drbd_set_out_of_sync(mdev, sector, size);
4435 /* In Protocol B we might already have got a P_RECV_ACK
4436 but then get a P_NEG_ACK after wards. */
4437 drbd_set_out_of_sync(mdev, sector, size);
4438 return true;
4439 } else {
4440 dev_err(DEV, "%s: failed to find req %p, sector %llus\n", __func__,
4441 (void *)(unsigned long)p->block_id, (unsigned long long)sector);
4442 return false;
4443 }
4444 } 5052 }
4445 __req_mod(req, neg_acked, &m); 5053 return 0;
4446 spin_unlock_irq(&mdev->req_lock);
4447
4448 if (m.bio)
4449 complete_master_bio(mdev, &m);
4450 return true;
4451} 5054}
4452 5055
4453static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) 5056static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi)
4454{ 5057{
4455 struct p_block_ack *p = (struct p_block_ack *)h; 5058 struct drbd_conf *mdev;
5059 struct p_block_ack *p = pi->data;
4456 sector_t sector = be64_to_cpu(p->sector); 5060 sector_t sector = be64_to_cpu(p->sector);
4457 5061
5062 mdev = vnr_to_mdev(tconn, pi->vnr);
5063 if (!mdev)
5064 return -EIO;
5065
4458 update_peer_seq(mdev, be32_to_cpu(p->seq_num)); 5066 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4459 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n", 5067
5068 dev_err(DEV, "Got NegDReply; Sector %llus, len %u.\n",
4460 (unsigned long long)sector, be32_to_cpu(p->blksize)); 5069 (unsigned long long)sector, be32_to_cpu(p->blksize));
4461 5070
4462 return validate_req_change_req_state(mdev, p->block_id, sector, 5071 return validate_req_change_req_state(mdev, p->block_id, sector,
4463 _ar_id_to_req, __func__ , neg_acked); 5072 &mdev->read_requests, __func__,
5073 NEG_ACKED, false);
4464} 5074}
4465 5075
4466static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) 5076static int got_NegRSDReply(struct drbd_tconn *tconn, struct packet_info *pi)
4467{ 5077{
5078 struct drbd_conf *mdev;
4468 sector_t sector; 5079 sector_t sector;
4469 int size; 5080 int size;
4470 struct p_block_ack *p = (struct p_block_ack *)h; 5081 struct p_block_ack *p = pi->data;
5082
5083 mdev = vnr_to_mdev(tconn, pi->vnr);
5084 if (!mdev)
5085 return -EIO;
4471 5086
4472 sector = be64_to_cpu(p->sector); 5087 sector = be64_to_cpu(p->sector);
4473 size = be32_to_cpu(p->blksize); 5088 size = be32_to_cpu(p->blksize);
@@ -4478,57 +5093,66 @@ static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h)
4478 5093
4479 if (get_ldev_if_state(mdev, D_FAILED)) { 5094 if (get_ldev_if_state(mdev, D_FAILED)) {
4480 drbd_rs_complete_io(mdev, sector); 5095 drbd_rs_complete_io(mdev, sector);
4481 switch (be16_to_cpu(h->command)) { 5096 switch (pi->cmd) {
4482 case P_NEG_RS_DREPLY: 5097 case P_NEG_RS_DREPLY:
4483 drbd_rs_failed_io(mdev, sector, size); 5098 drbd_rs_failed_io(mdev, sector, size);
4484 case P_RS_CANCEL: 5099 case P_RS_CANCEL:
4485 break; 5100 break;
4486 default: 5101 default:
4487 D_ASSERT(0); 5102 BUG();
4488 put_ldev(mdev);
4489 return false;
4490 } 5103 }
4491 put_ldev(mdev); 5104 put_ldev(mdev);
4492 } 5105 }
4493 5106
4494 return true; 5107 return 0;
4495} 5108}
4496 5109
4497static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h) 5110static int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi)
4498{ 5111{
4499 struct p_barrier_ack *p = (struct p_barrier_ack *)h; 5112 struct p_barrier_ack *p = pi->data;
4500 5113 struct drbd_conf *mdev;
4501 tl_release(mdev, p->barrier, be32_to_cpu(p->set_size)); 5114 int vnr;
4502 5115
4503 if (mdev->state.conn == C_AHEAD && 5116 tl_release(tconn, p->barrier, be32_to_cpu(p->set_size));
4504 atomic_read(&mdev->ap_in_flight) == 0 && 5117
4505 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) { 5118 rcu_read_lock();
4506 mdev->start_resync_timer.expires = jiffies + HZ; 5119 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
4507 add_timer(&mdev->start_resync_timer); 5120 if (mdev->state.conn == C_AHEAD &&
5121 atomic_read(&mdev->ap_in_flight) == 0 &&
5122 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) {
5123 mdev->start_resync_timer.expires = jiffies + HZ;
5124 add_timer(&mdev->start_resync_timer);
5125 }
4508 } 5126 }
5127 rcu_read_unlock();
4509 5128
4510 return true; 5129 return 0;
4511} 5130}
4512 5131
4513static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) 5132static int got_OVResult(struct drbd_tconn *tconn, struct packet_info *pi)
4514{ 5133{
4515 struct p_block_ack *p = (struct p_block_ack *)h; 5134 struct drbd_conf *mdev;
5135 struct p_block_ack *p = pi->data;
4516 struct drbd_work *w; 5136 struct drbd_work *w;
4517 sector_t sector; 5137 sector_t sector;
4518 int size; 5138 int size;
4519 5139
5140 mdev = vnr_to_mdev(tconn, pi->vnr);
5141 if (!mdev)
5142 return -EIO;
5143
4520 sector = be64_to_cpu(p->sector); 5144 sector = be64_to_cpu(p->sector);
4521 size = be32_to_cpu(p->blksize); 5145 size = be32_to_cpu(p->blksize);
4522 5146
4523 update_peer_seq(mdev, be32_to_cpu(p->seq_num)); 5147 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4524 5148
4525 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC) 5149 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
4526 drbd_ov_oos_found(mdev, sector, size); 5150 drbd_ov_out_of_sync_found(mdev, sector, size);
4527 else 5151 else
4528 ov_oos_print(mdev); 5152 ov_out_of_sync_print(mdev);
4529 5153
4530 if (!get_ldev(mdev)) 5154 if (!get_ldev(mdev))
4531 return true; 5155 return 0;
4532 5156
4533 drbd_rs_complete_io(mdev, sector); 5157 drbd_rs_complete_io(mdev, sector);
4534 dec_rs_pending(mdev); 5158 dec_rs_pending(mdev);
@@ -4543,114 +5167,137 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h)
4543 w = kmalloc(sizeof(*w), GFP_NOIO); 5167 w = kmalloc(sizeof(*w), GFP_NOIO);
4544 if (w) { 5168 if (w) {
4545 w->cb = w_ov_finished; 5169 w->cb = w_ov_finished;
4546 drbd_queue_work_front(&mdev->data.work, w); 5170 w->mdev = mdev;
5171 drbd_queue_work(&mdev->tconn->sender_work, w);
4547 } else { 5172 } else {
4548 dev_err(DEV, "kmalloc(w) failed."); 5173 dev_err(DEV, "kmalloc(w) failed.");
4549 ov_oos_print(mdev); 5174 ov_out_of_sync_print(mdev);
4550 drbd_resync_finished(mdev); 5175 drbd_resync_finished(mdev);
4551 } 5176 }
4552 } 5177 }
4553 put_ldev(mdev); 5178 put_ldev(mdev);
4554 return true; 5179 return 0;
5180}
5181
5182static int got_skip(struct drbd_tconn *tconn, struct packet_info *pi)
5183{
5184 return 0;
4555} 5185}
4556 5186
4557static int got_skip(struct drbd_conf *mdev, struct p_header80 *h) 5187static int tconn_finish_peer_reqs(struct drbd_tconn *tconn)
4558{ 5188{
4559 return true; 5189 struct drbd_conf *mdev;
5190 int vnr, not_empty = 0;
5191
5192 do {
5193 clear_bit(SIGNAL_ASENDER, &tconn->flags);
5194 flush_signals(current);
5195
5196 rcu_read_lock();
5197 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
5198 kref_get(&mdev->kref);
5199 rcu_read_unlock();
5200 if (drbd_finish_peer_reqs(mdev)) {
5201 kref_put(&mdev->kref, &drbd_minor_destroy);
5202 return 1;
5203 }
5204 kref_put(&mdev->kref, &drbd_minor_destroy);
5205 rcu_read_lock();
5206 }
5207 set_bit(SIGNAL_ASENDER, &tconn->flags);
5208
5209 spin_lock_irq(&tconn->req_lock);
5210 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
5211 not_empty = !list_empty(&mdev->done_ee);
5212 if (not_empty)
5213 break;
5214 }
5215 spin_unlock_irq(&tconn->req_lock);
5216 rcu_read_unlock();
5217 } while (not_empty);
5218
5219 return 0;
4560} 5220}
4561 5221
4562struct asender_cmd { 5222struct asender_cmd {
4563 size_t pkt_size; 5223 size_t pkt_size;
4564 int (*process)(struct drbd_conf *mdev, struct p_header80 *h); 5224 int (*fn)(struct drbd_tconn *tconn, struct packet_info *);
4565}; 5225};
4566 5226
4567static struct asender_cmd *get_asender_cmd(int cmd) 5227static struct asender_cmd asender_tbl[] = {
4568{ 5228 [P_PING] = { 0, got_Ping },
4569 static struct asender_cmd asender_tbl[] = { 5229 [P_PING_ACK] = { 0, got_PingAck },
4570 /* anything missing from this table is in
4571 * the drbd_cmd_handler (drbd_default_handler) table,
4572 * see the beginning of drbdd() */
4573 [P_PING] = { sizeof(struct p_header80), got_Ping },
4574 [P_PING_ACK] = { sizeof(struct p_header80), got_PingAck },
4575 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 5230 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4576 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 5231 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4577 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 5232 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4578 [P_DISCARD_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 5233 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
4579 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck }, 5234 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
4580 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply }, 5235 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
4581 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply}, 5236 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
4582 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult }, 5237 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
4583 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, 5238 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
4584 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, 5239 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
4585 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, 5240 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
4586 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip }, 5241 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
4587 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply}, 5242 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
4588 [P_MAX_CMD] = { 0, NULL }, 5243 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
4589 }; 5244 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
4590 if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL) 5245};
4591 return NULL;
4592 return &asender_tbl[cmd];
4593}
4594 5246
4595int drbd_asender(struct drbd_thread *thi) 5247int drbd_asender(struct drbd_thread *thi)
4596{ 5248{
4597 struct drbd_conf *mdev = thi->mdev; 5249 struct drbd_tconn *tconn = thi->tconn;
4598 struct p_header80 *h = &mdev->meta.rbuf.header.h80;
4599 struct asender_cmd *cmd = NULL; 5250 struct asender_cmd *cmd = NULL;
4600 5251 struct packet_info pi;
4601 int rv, len; 5252 int rv;
4602 void *buf = h; 5253 void *buf = tconn->meta.rbuf;
4603 int received = 0; 5254 int received = 0;
4604 int expect = sizeof(struct p_header80); 5255 unsigned int header_size = drbd_header_size(tconn);
4605 int empty; 5256 int expect = header_size;
4606 int ping_timeout_active = 0; 5257 bool ping_timeout_active = false;
4607 5258 struct net_conf *nc;
4608 sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); 5259 int ping_timeo, tcp_cork, ping_int;
4609 5260
4610 current->policy = SCHED_RR; /* Make this a realtime task! */ 5261 current->policy = SCHED_RR; /* Make this a realtime task! */
4611 current->rt_priority = 2; /* more important than all other tasks */ 5262 current->rt_priority = 2; /* more important than all other tasks */
4612 5263
4613 while (get_t_state(thi) == Running) { 5264 while (get_t_state(thi) == RUNNING) {
4614 drbd_thread_current_set_cpu(mdev); 5265 drbd_thread_current_set_cpu(thi);
4615 if (test_and_clear_bit(SEND_PING, &mdev->flags)) {
4616 ERR_IF(!drbd_send_ping(mdev)) goto reconnect;
4617 mdev->meta.socket->sk->sk_rcvtimeo =
4618 mdev->net_conf->ping_timeo*HZ/10;
4619 ping_timeout_active = 1;
4620 }
4621 5266
4622 /* conditionally cork; 5267 rcu_read_lock();
4623 * it may hurt latency if we cork without much to send */ 5268 nc = rcu_dereference(tconn->net_conf);
4624 if (!mdev->net_conf->no_cork && 5269 ping_timeo = nc->ping_timeo;
4625 3 < atomic_read(&mdev->unacked_cnt)) 5270 tcp_cork = nc->tcp_cork;
4626 drbd_tcp_cork(mdev->meta.socket); 5271 ping_int = nc->ping_int;
4627 while (1) { 5272 rcu_read_unlock();
4628 clear_bit(SIGNAL_ASENDER, &mdev->flags); 5273
4629 flush_signals(current); 5274 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
4630 if (!drbd_process_done_ee(mdev)) 5275 if (drbd_send_ping(tconn)) {
5276 conn_err(tconn, "drbd_send_ping has failed\n");
4631 goto reconnect; 5277 goto reconnect;
4632 /* to avoid race with newly queued ACKs */ 5278 }
4633 set_bit(SIGNAL_ASENDER, &mdev->flags); 5279 tconn->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
4634 spin_lock_irq(&mdev->req_lock); 5280 ping_timeout_active = true;
4635 empty = list_empty(&mdev->done_ee); 5281 }
4636 spin_unlock_irq(&mdev->req_lock); 5282
4637 /* new ack may have been queued right here, 5283 /* TODO: conditionally cork; it may hurt latency if we cork without
4638 * but then there is also a signal pending, 5284 much to send */
4639 * and we start over... */ 5285 if (tcp_cork)
4640 if (empty) 5286 drbd_tcp_cork(tconn->meta.socket);
4641 break; 5287 if (tconn_finish_peer_reqs(tconn)) {
5288 conn_err(tconn, "tconn_finish_peer_reqs() failed\n");
5289 goto reconnect;
4642 } 5290 }
4643 /* but unconditionally uncork unless disabled */ 5291 /* but unconditionally uncork unless disabled */
4644 if (!mdev->net_conf->no_cork) 5292 if (tcp_cork)
4645 drbd_tcp_uncork(mdev->meta.socket); 5293 drbd_tcp_uncork(tconn->meta.socket);
4646 5294
4647 /* short circuit, recv_msg would return EINTR anyways. */ 5295 /* short circuit, recv_msg would return EINTR anyways. */
4648 if (signal_pending(current)) 5296 if (signal_pending(current))
4649 continue; 5297 continue;
4650 5298
4651 rv = drbd_recv_short(mdev, mdev->meta.socket, 5299 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
4652 buf, expect-received, 0); 5300 clear_bit(SIGNAL_ASENDER, &tconn->flags);
4653 clear_bit(SIGNAL_ASENDER, &mdev->flags);
4654 5301
4655 flush_signals(current); 5302 flush_signals(current);
4656 5303
@@ -4668,80 +5315,91 @@ int drbd_asender(struct drbd_thread *thi)
4668 received += rv; 5315 received += rv;
4669 buf += rv; 5316 buf += rv;
4670 } else if (rv == 0) { 5317 } else if (rv == 0) {
4671 dev_err(DEV, "meta connection shut down by peer.\n"); 5318 if (test_bit(DISCONNECT_SENT, &tconn->flags)) {
5319 long t;
5320 rcu_read_lock();
5321 t = rcu_dereference(tconn->net_conf)->ping_timeo * HZ/10;
5322 rcu_read_unlock();
5323
5324 t = wait_event_timeout(tconn->ping_wait,
5325 tconn->cstate < C_WF_REPORT_PARAMS,
5326 t);
5327 if (t)
5328 break;
5329 }
5330 conn_err(tconn, "meta connection shut down by peer.\n");
4672 goto reconnect; 5331 goto reconnect;
4673 } else if (rv == -EAGAIN) { 5332 } else if (rv == -EAGAIN) {
4674 /* If the data socket received something meanwhile, 5333 /* If the data socket received something meanwhile,
4675 * that is good enough: peer is still alive. */ 5334 * that is good enough: peer is still alive. */
4676 if (time_after(mdev->last_received, 5335 if (time_after(tconn->last_received,
4677 jiffies - mdev->meta.socket->sk->sk_rcvtimeo)) 5336 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
4678 continue; 5337 continue;
4679 if (ping_timeout_active) { 5338 if (ping_timeout_active) {
4680 dev_err(DEV, "PingAck did not arrive in time.\n"); 5339 conn_err(tconn, "PingAck did not arrive in time.\n");
4681 goto reconnect; 5340 goto reconnect;
4682 } 5341 }
4683 set_bit(SEND_PING, &mdev->flags); 5342 set_bit(SEND_PING, &tconn->flags);
4684 continue; 5343 continue;
4685 } else if (rv == -EINTR) { 5344 } else if (rv == -EINTR) {
4686 continue; 5345 continue;
4687 } else { 5346 } else {
4688 dev_err(DEV, "sock_recvmsg returned %d\n", rv); 5347 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
4689 goto reconnect; 5348 goto reconnect;
4690 } 5349 }
4691 5350
4692 if (received == expect && cmd == NULL) { 5351 if (received == expect && cmd == NULL) {
4693 if (unlikely(h->magic != BE_DRBD_MAGIC)) { 5352 if (decode_header(tconn, tconn->meta.rbuf, &pi))
4694 dev_err(DEV, "magic?? on meta m: 0x%08x c: %d l: %d\n",
4695 be32_to_cpu(h->magic),
4696 be16_to_cpu(h->command),
4697 be16_to_cpu(h->length));
4698 goto reconnect; 5353 goto reconnect;
4699 } 5354 cmd = &asender_tbl[pi.cmd];
4700 cmd = get_asender_cmd(be16_to_cpu(h->command)); 5355 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
4701 len = be16_to_cpu(h->length); 5356 conn_err(tconn, "Unexpected meta packet %s (0x%04x)\n",
4702 if (unlikely(cmd == NULL)) { 5357 cmdname(pi.cmd), pi.cmd);
4703 dev_err(DEV, "unknown command?? on meta m: 0x%08x c: %d l: %d\n",
4704 be32_to_cpu(h->magic),
4705 be16_to_cpu(h->command),
4706 be16_to_cpu(h->length));
4707 goto disconnect; 5358 goto disconnect;
4708 } 5359 }
4709 expect = cmd->pkt_size; 5360 expect = header_size + cmd->pkt_size;
4710 ERR_IF(len != expect-sizeof(struct p_header80)) 5361 if (pi.size != expect - header_size) {
5362 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
5363 pi.cmd, pi.size);
4711 goto reconnect; 5364 goto reconnect;
5365 }
4712 } 5366 }
4713 if (received == expect) { 5367 if (received == expect) {
4714 mdev->last_received = jiffies; 5368 bool err;
4715 D_ASSERT(cmd != NULL); 5369
4716 if (!cmd->process(mdev, h)) 5370 err = cmd->fn(tconn, &pi);
5371 if (err) {
5372 conn_err(tconn, "%pf failed\n", cmd->fn);
4717 goto reconnect; 5373 goto reconnect;
5374 }
5375
5376 tconn->last_received = jiffies;
4718 5377
4719 /* the idle_timeout (ping-int) 5378 if (cmd == &asender_tbl[P_PING_ACK]) {
4720 * has been restored in got_PingAck() */ 5379 /* restore idle timeout */
4721 if (cmd == get_asender_cmd(P_PING_ACK)) 5380 tconn->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
4722 ping_timeout_active = 0; 5381 ping_timeout_active = false;
5382 }
4723 5383
4724 buf = h; 5384 buf = tconn->meta.rbuf;
4725 received = 0; 5385 received = 0;
4726 expect = sizeof(struct p_header80); 5386 expect = header_size;
4727 cmd = NULL; 5387 cmd = NULL;
4728 } 5388 }
4729 } 5389 }
4730 5390
4731 if (0) { 5391 if (0) {
4732reconnect: 5392reconnect:
4733 drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE)); 5393 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
4734 drbd_md_sync(mdev); 5394 conn_md_sync(tconn);
4735 } 5395 }
4736 if (0) { 5396 if (0) {
4737disconnect: 5397disconnect:
4738 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 5398 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
4739 drbd_md_sync(mdev);
4740 } 5399 }
4741 clear_bit(SIGNAL_ASENDER, &mdev->flags); 5400 clear_bit(SIGNAL_ASENDER, &tconn->flags);
4742 5401
4743 D_ASSERT(mdev->state.conn < C_CONNECTED); 5402 conn_info(tconn, "asender terminated\n");
4744 dev_info(DEV, "asender terminated\n");
4745 5403
4746 return 0; 5404 return 0;
4747} 5405}