aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd/drbd_receiver.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-10-22 20:03:12 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-22 20:03:12 -0400
commit8abfc6e7a45eb74e51904bbae676fae008b11366 (patch)
tree57d0a24558c0693e3a52e8e756616f6c72def1e9 /drivers/block/drbd/drbd_receiver.c
parente9dd2b6837e26fe202708cce5ea4bb4ee3e3482e (diff)
parent6362beea8914cbd4630ccde3617d944aeca2d48f (diff)
Merge branch 'for-2.6.37/drivers' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.37/drivers' of git://git.kernel.dk/linux-2.6-block: (95 commits) cciss: fix PCI IDs for new Smart Array controllers drbd: add race-breaker to drbd_go_diskless drbd: use dynamic_dev_dbg to optionally log uuid changes dynamic_debug.h: Fix dynamic_dev_dbg() macro if CONFIG_DYNAMIC_DEBUG not set drbd: cleanup: change "<= 0" to "== 0" drbd: relax the grace period of the md_sync timer again drbd: add some more explicit drbd_md_sync drbd: drop wrong debug asserts, fix recently introduced race drbd: cleanup useless leftover warn/error printk's drbd: add explicit drbd_md_sync to drbd_resync_finished drbd: Do not log an ASSERT for P_OV_REQUEST packets while C_CONNECTED drbd: fix for possible deadlock on IO error during resync drbd: fix unlikely access after free and list corruption drbd: fix for spurious fullsync (uuids rotated too fast) drbd: allow for explicit resync-finished notifications drbd: preparation commit, using full state in receive_state() drbd: drbd_send_ack_dp must not rely on header information drbd: Fix regression in recv_bm_rle_bits (compressed bitmap) drbd: Fixed a stupid copy and paste error drbd: Allow larger values for c-fill-target. ... Fix up trivial conflict in drivers/block/ataflop.c due to BKL removal
Diffstat (limited to 'drivers/block/drbd/drbd_receiver.c')
-rw-r--r--drivers/block/drbd/drbd_receiver.c946
1 files changed, 537 insertions, 409 deletions
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 484ecbb6b772..760ae0df9251 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -241,7 +241,7 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
241 spin_unlock_irq(&mdev->req_lock); 241 spin_unlock_irq(&mdev->req_lock);
242 242
243 list_for_each_entry_safe(e, t, &reclaimed, w.list) 243 list_for_each_entry_safe(e, t, &reclaimed, w.list)
244 drbd_free_ee(mdev, e); 244 drbd_free_net_ee(mdev, e);
245} 245}
246 246
247/** 247/**
@@ -298,9 +298,11 @@ static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool
298 * Is also used from inside an other spin_lock_irq(&mdev->req_lock); 298 * Is also used from inside an other spin_lock_irq(&mdev->req_lock);
299 * Either links the page chain back to the global pool, 299 * Either links the page chain back to the global pool,
300 * or returns all pages to the system. */ 300 * or returns all pages to the system. */
301static void drbd_pp_free(struct drbd_conf *mdev, struct page *page) 301static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
302{ 302{
303 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
303 int i; 304 int i;
305
304 if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count) 306 if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count)
305 i = page_chain_free(page); 307 i = page_chain_free(page);
306 else { 308 else {
@@ -311,10 +313,10 @@ static void drbd_pp_free(struct drbd_conf *mdev, struct page *page)
311 drbd_pp_vacant += i; 313 drbd_pp_vacant += i;
312 spin_unlock(&drbd_pp_lock); 314 spin_unlock(&drbd_pp_lock);
313 } 315 }
314 atomic_sub(i, &mdev->pp_in_use); 316 i = atomic_sub_return(i, a);
315 i = atomic_read(&mdev->pp_in_use);
316 if (i < 0) 317 if (i < 0)
317 dev_warn(DEV, "ASSERTION FAILED: pp_in_use: %d < 0\n", i); 318 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
319 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
318 wake_up(&drbd_pp_wait); 320 wake_up(&drbd_pp_wait);
319} 321}
320 322
@@ -365,7 +367,6 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
365 e->size = data_size; 367 e->size = data_size;
366 e->flags = 0; 368 e->flags = 0;
367 e->sector = sector; 369 e->sector = sector;
368 e->sector = sector;
369 e->block_id = id; 370 e->block_id = id;
370 371
371 return e; 372 return e;
@@ -375,9 +376,11 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
375 return NULL; 376 return NULL;
376} 377}
377 378
378void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) 379void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int is_net)
379{ 380{
380 drbd_pp_free(mdev, e->pages); 381 if (e->flags & EE_HAS_DIGEST)
382 kfree(e->digest);
383 drbd_pp_free(mdev, e->pages, is_net);
381 D_ASSERT(atomic_read(&e->pending_bios) == 0); 384 D_ASSERT(atomic_read(&e->pending_bios) == 0);
382 D_ASSERT(hlist_unhashed(&e->colision)); 385 D_ASSERT(hlist_unhashed(&e->colision));
383 mempool_free(e, drbd_ee_mempool); 386 mempool_free(e, drbd_ee_mempool);
@@ -388,13 +391,14 @@ int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
388 LIST_HEAD(work_list); 391 LIST_HEAD(work_list);
389 struct drbd_epoch_entry *e, *t; 392 struct drbd_epoch_entry *e, *t;
390 int count = 0; 393 int count = 0;
394 int is_net = list == &mdev->net_ee;
391 395
392 spin_lock_irq(&mdev->req_lock); 396 spin_lock_irq(&mdev->req_lock);
393 list_splice_init(list, &work_list); 397 list_splice_init(list, &work_list);
394 spin_unlock_irq(&mdev->req_lock); 398 spin_unlock_irq(&mdev->req_lock);
395 399
396 list_for_each_entry_safe(e, t, &work_list, w.list) { 400 list_for_each_entry_safe(e, t, &work_list, w.list) {
397 drbd_free_ee(mdev, e); 401 drbd_free_some_ee(mdev, e, is_net);
398 count++; 402 count++;
399 } 403 }
400 return count; 404 return count;
@@ -423,7 +427,7 @@ static int drbd_process_done_ee(struct drbd_conf *mdev)
423 spin_unlock_irq(&mdev->req_lock); 427 spin_unlock_irq(&mdev->req_lock);
424 428
425 list_for_each_entry_safe(e, t, &reclaimed, w.list) 429 list_for_each_entry_safe(e, t, &reclaimed, w.list)
426 drbd_free_ee(mdev, e); 430 drbd_free_net_ee(mdev, e);
427 431
428 /* possible callbacks here: 432 /* possible callbacks here:
429 * e_end_block, and e_end_resync_block, e_send_discard_ack. 433 * e_end_block, and e_end_resync_block, e_send_discard_ack.
@@ -719,14 +723,14 @@ out:
719static int drbd_send_fp(struct drbd_conf *mdev, 723static int drbd_send_fp(struct drbd_conf *mdev,
720 struct socket *sock, enum drbd_packets cmd) 724 struct socket *sock, enum drbd_packets cmd)
721{ 725{
722 struct p_header *h = (struct p_header *) &mdev->data.sbuf.header; 726 struct p_header80 *h = &mdev->data.sbuf.header.h80;
723 727
724 return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0); 728 return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0);
725} 729}
726 730
727static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock) 731static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock)
728{ 732{
729 struct p_header *h = (struct p_header *) &mdev->data.sbuf.header; 733 struct p_header80 *h = &mdev->data.rbuf.header.h80;
730 int rr; 734 int rr;
731 735
732 rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0); 736 rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0);
@@ -776,9 +780,6 @@ static int drbd_connect(struct drbd_conf *mdev)
776 780
777 D_ASSERT(!mdev->data.socket); 781 D_ASSERT(!mdev->data.socket);
778 782
779 if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags))
780 dev_err(DEV, "CREATE_BARRIER flag was set in drbd_connect - now cleared!\n");
781
782 if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS) 783 if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS)
783 return -2; 784 return -2;
784 785
@@ -927,6 +928,11 @@ retry:
927 928
928 drbd_thread_start(&mdev->asender); 929 drbd_thread_start(&mdev->asender);
929 930
931 if (mdev->agreed_pro_version < 95 && get_ldev(mdev)) {
932 drbd_setup_queue_param(mdev, DRBD_MAX_SIZE_H80_PACKET);
933 put_ldev(mdev);
934 }
935
930 if (!drbd_send_protocol(mdev)) 936 if (!drbd_send_protocol(mdev))
931 return -1; 937 return -1;
932 drbd_send_sync_param(mdev, &mdev->sync_conf); 938 drbd_send_sync_param(mdev, &mdev->sync_conf);
@@ -946,22 +952,28 @@ out_release_sockets:
946 return -1; 952 return -1;
947} 953}
948 954
949static int drbd_recv_header(struct drbd_conf *mdev, struct p_header *h) 955static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsigned int *packet_size)
950{ 956{
957 union p_header *h = &mdev->data.rbuf.header;
951 int r; 958 int r;
952 959
953 r = drbd_recv(mdev, h, sizeof(*h)); 960 r = drbd_recv(mdev, h, sizeof(*h));
954
955 if (unlikely(r != sizeof(*h))) { 961 if (unlikely(r != sizeof(*h))) {
956 dev_err(DEV, "short read expecting header on sock: r=%d\n", r); 962 dev_err(DEV, "short read expecting header on sock: r=%d\n", r);
957 return FALSE; 963 return FALSE;
958 }; 964 }
959 h->command = be16_to_cpu(h->command); 965
960 h->length = be16_to_cpu(h->length); 966 if (likely(h->h80.magic == BE_DRBD_MAGIC)) {
961 if (unlikely(h->magic != BE_DRBD_MAGIC)) { 967 *cmd = be16_to_cpu(h->h80.command);
962 dev_err(DEV, "magic?? on data m: 0x%lx c: %d l: %d\n", 968 *packet_size = be16_to_cpu(h->h80.length);
963 (long)be32_to_cpu(h->magic), 969 } else if (h->h95.magic == BE_DRBD_MAGIC_BIG) {
964 h->command, h->length); 970 *cmd = be16_to_cpu(h->h95.command);
971 *packet_size = be32_to_cpu(h->h95.length);
972 } else {
973 dev_err(DEV, "magic?? on data m: 0x%08x c: %d l: %d\n",
974 be32_to_cpu(h->h80.magic),
975 be16_to_cpu(h->h80.command),
976 be16_to_cpu(h->h80.length));
965 return FALSE; 977 return FALSE;
966 } 978 }
967 mdev->last_received = jiffies; 979 mdev->last_received = jiffies;
@@ -1268,17 +1280,12 @@ int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __relea
1268 return 1; 1280 return 1;
1269} 1281}
1270 1282
1271static int receive_Barrier(struct drbd_conf *mdev, struct p_header *h) 1283static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
1272{ 1284{
1273 int rv, issue_flush; 1285 int rv, issue_flush;
1274 struct p_barrier *p = (struct p_barrier *)h; 1286 struct p_barrier *p = &mdev->data.rbuf.barrier;
1275 struct drbd_epoch *epoch; 1287 struct drbd_epoch *epoch;
1276 1288
1277 ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
1278
1279 rv = drbd_recv(mdev, h->payload, h->length);
1280 ERR_IF(rv != h->length) return FALSE;
1281
1282 inc_unacked(mdev); 1289 inc_unacked(mdev);
1283 1290
1284 if (mdev->net_conf->wire_protocol != DRBD_PROT_C) 1291 if (mdev->net_conf->wire_protocol != DRBD_PROT_C)
@@ -1457,7 +1464,7 @@ static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1457 data_size -= rr; 1464 data_size -= rr;
1458 } 1465 }
1459 kunmap(page); 1466 kunmap(page);
1460 drbd_pp_free(mdev, page); 1467 drbd_pp_free(mdev, page, 0);
1461 return rv; 1468 return rv;
1462} 1469}
1463 1470
@@ -1562,30 +1569,29 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si
1562 list_add(&e->w.list, &mdev->sync_ee); 1569 list_add(&e->w.list, &mdev->sync_ee);
1563 spin_unlock_irq(&mdev->req_lock); 1570 spin_unlock_irq(&mdev->req_lock);
1564 1571
1572 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
1565 if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) 1573 if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0)
1566 return TRUE; 1574 return TRUE;
1567 1575
1576 /* drbd_submit_ee currently fails for one reason only:
1577 * not being able to allocate enough bios.
1578 * Is dropping the connection going to help? */
1579 spin_lock_irq(&mdev->req_lock);
1580 list_del(&e->w.list);
1581 spin_unlock_irq(&mdev->req_lock);
1582
1568 drbd_free_ee(mdev, e); 1583 drbd_free_ee(mdev, e);
1569fail: 1584fail:
1570 put_ldev(mdev); 1585 put_ldev(mdev);
1571 return FALSE; 1586 return FALSE;
1572} 1587}
1573 1588
1574static int receive_DataReply(struct drbd_conf *mdev, struct p_header *h) 1589static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
1575{ 1590{
1576 struct drbd_request *req; 1591 struct drbd_request *req;
1577 sector_t sector; 1592 sector_t sector;
1578 unsigned int header_size, data_size;
1579 int ok; 1593 int ok;
1580 struct p_data *p = (struct p_data *)h; 1594 struct p_data *p = &mdev->data.rbuf.data;
1581
1582 header_size = sizeof(*p) - sizeof(*h);
1583 data_size = h->length - header_size;
1584
1585 ERR_IF(data_size == 0) return FALSE;
1586
1587 if (drbd_recv(mdev, h->payload, header_size) != header_size)
1588 return FALSE;
1589 1595
1590 sector = be64_to_cpu(p->sector); 1596 sector = be64_to_cpu(p->sector);
1591 1597
@@ -1611,20 +1617,11 @@ static int receive_DataReply(struct drbd_conf *mdev, struct p_header *h)
1611 return ok; 1617 return ok;
1612} 1618}
1613 1619
1614static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h) 1620static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
1615{ 1621{
1616 sector_t sector; 1622 sector_t sector;
1617 unsigned int header_size, data_size;
1618 int ok; 1623 int ok;
1619 struct p_data *p = (struct p_data *)h; 1624 struct p_data *p = &mdev->data.rbuf.data;
1620
1621 header_size = sizeof(*p) - sizeof(*h);
1622 data_size = h->length - header_size;
1623
1624 ERR_IF(data_size == 0) return FALSE;
1625
1626 if (drbd_recv(mdev, h->payload, header_size) != header_size)
1627 return FALSE;
1628 1625
1629 sector = be64_to_cpu(p->sector); 1626 sector = be64_to_cpu(p->sector);
1630 D_ASSERT(p->block_id == ID_SYNCER); 1627 D_ASSERT(p->block_id == ID_SYNCER);
@@ -1640,9 +1637,11 @@ static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h)
1640 1637
1641 ok = drbd_drain_block(mdev, data_size); 1638 ok = drbd_drain_block(mdev, data_size);
1642 1639
1643 drbd_send_ack_dp(mdev, P_NEG_ACK, p); 1640 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
1644 } 1641 }
1645 1642
1643 atomic_add(data_size >> 9, &mdev->rs_sect_in);
1644
1646 return ok; 1645 return ok;
1647} 1646}
1648 1647
@@ -1765,24 +1764,27 @@ static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq)
1765 return ret; 1764 return ret;
1766} 1765}
1767 1766
1767static unsigned long write_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
1768{
1769 if (mdev->agreed_pro_version >= 95)
1770 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1771 (dpf & DP_UNPLUG ? REQ_UNPLUG : 0) |
1772 (dpf & DP_FUA ? REQ_FUA : 0) |
1773 (dpf & DP_FLUSH ? REQ_FUA : 0) |
1774 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
1775 else
1776 return dpf & DP_RW_SYNC ? (REQ_SYNC | REQ_UNPLUG) : 0;
1777}
1778
1768/* mirrored write */ 1779/* mirrored write */
1769static int receive_Data(struct drbd_conf *mdev, struct p_header *h) 1780static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
1770{ 1781{
1771 sector_t sector; 1782 sector_t sector;
1772 struct drbd_epoch_entry *e; 1783 struct drbd_epoch_entry *e;
1773 struct p_data *p = (struct p_data *)h; 1784 struct p_data *p = &mdev->data.rbuf.data;
1774 int header_size, data_size;
1775 int rw = WRITE; 1785 int rw = WRITE;
1776 u32 dp_flags; 1786 u32 dp_flags;
1777 1787
1778 header_size = sizeof(*p) - sizeof(*h);
1779 data_size = h->length - header_size;
1780
1781 ERR_IF(data_size == 0) return FALSE;
1782
1783 if (drbd_recv(mdev, h->payload, header_size) != header_size)
1784 return FALSE;
1785
1786 if (!get_ldev(mdev)) { 1788 if (!get_ldev(mdev)) {
1787 if (__ratelimit(&drbd_ratelimit_state)) 1789 if (__ratelimit(&drbd_ratelimit_state))
1788 dev_err(DEV, "Can not write mirrored data block " 1790 dev_err(DEV, "Can not write mirrored data block "
@@ -1792,7 +1794,7 @@ static int receive_Data(struct drbd_conf *mdev, struct p_header *h)
1792 mdev->peer_seq++; 1794 mdev->peer_seq++;
1793 spin_unlock(&mdev->peer_seq_lock); 1795 spin_unlock(&mdev->peer_seq_lock);
1794 1796
1795 drbd_send_ack_dp(mdev, P_NEG_ACK, p); 1797 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
1796 atomic_inc(&mdev->current_epoch->epoch_size); 1798 atomic_inc(&mdev->current_epoch->epoch_size);
1797 return drbd_drain_block(mdev, data_size); 1799 return drbd_drain_block(mdev, data_size);
1798 } 1800 }
@@ -1839,12 +1841,8 @@ static int receive_Data(struct drbd_conf *mdev, struct p_header *h)
1839 spin_unlock(&mdev->epoch_lock); 1841 spin_unlock(&mdev->epoch_lock);
1840 1842
1841 dp_flags = be32_to_cpu(p->dp_flags); 1843 dp_flags = be32_to_cpu(p->dp_flags);
1842 if (dp_flags & DP_HARDBARRIER) { 1844 rw |= write_flags_to_bio(mdev, dp_flags);
1843 dev_err(DEV, "ASSERT FAILED would have submitted barrier request\n"); 1845
1844 /* rw |= REQ_HARDBARRIER; */
1845 }
1846 if (dp_flags & DP_RW_SYNC)
1847 rw |= REQ_SYNC | REQ_UNPLUG;
1848 if (dp_flags & DP_MAY_SET_IN_SYNC) 1846 if (dp_flags & DP_MAY_SET_IN_SYNC)
1849 e->flags |= EE_MAY_SET_IN_SYNC; 1847 e->flags |= EE_MAY_SET_IN_SYNC;
1850 1848
@@ -2007,6 +2005,16 @@ static int receive_Data(struct drbd_conf *mdev, struct p_header *h)
2007 if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0) 2005 if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0)
2008 return TRUE; 2006 return TRUE;
2009 2007
2008 /* drbd_submit_ee currently fails for one reason only:
2009 * not being able to allocate enough bios.
2010 * Is dropping the connection going to help? */
2011 spin_lock_irq(&mdev->req_lock);
2012 list_del(&e->w.list);
2013 hlist_del_init(&e->colision);
2014 spin_unlock_irq(&mdev->req_lock);
2015 if (e->flags & EE_CALL_AL_COMPLETE_IO)
2016 drbd_al_complete_io(mdev, e->sector);
2017
2010out_interrupted: 2018out_interrupted:
2011 /* yes, the epoch_size now is imbalanced. 2019 /* yes, the epoch_size now is imbalanced.
2012 * but we drop the connection anyways, so we don't have a chance to 2020 * but we drop the connection anyways, so we don't have a chance to
@@ -2016,20 +2024,64 @@ out_interrupted:
2016 return FALSE; 2024 return FALSE;
2017} 2025}
2018 2026
2019static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) 2027/* We may throttle resync, if the lower device seems to be busy,
2028 * and current sync rate is above c_min_rate.
2029 *
2030 * To decide whether or not the lower device is busy, we use a scheme similar
2031 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2032 * (more than 64 sectors) of activity we cannot account for with our own resync
2033 * activity, it obviously is "busy".
2034 *
2035 * The current sync rate used here uses only the most recent two step marks,
2036 * to have a short time average so we can react faster.
2037 */
2038int drbd_rs_should_slow_down(struct drbd_conf *mdev)
2039{
2040 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
2041 unsigned long db, dt, dbdt;
2042 int curr_events;
2043 int throttle = 0;
2044
2045 /* feature disabled? */
2046 if (mdev->sync_conf.c_min_rate == 0)
2047 return 0;
2048
2049 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2050 (int)part_stat_read(&disk->part0, sectors[1]) -
2051 atomic_read(&mdev->rs_sect_ev);
2052 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
2053 unsigned long rs_left;
2054 int i;
2055
2056 mdev->rs_last_events = curr_events;
2057
2058 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2059 * approx. */
2060 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-2) % DRBD_SYNC_MARKS;
2061 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
2062
2063 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2064 if (!dt)
2065 dt++;
2066 db = mdev->rs_mark_left[i] - rs_left;
2067 dbdt = Bit2KB(db/dt);
2068
2069 if (dbdt > mdev->sync_conf.c_min_rate)
2070 throttle = 1;
2071 }
2072 return throttle;
2073}
2074
2075
2076static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int digest_size)
2020{ 2077{
2021 sector_t sector; 2078 sector_t sector;
2022 const sector_t capacity = drbd_get_capacity(mdev->this_bdev); 2079 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
2023 struct drbd_epoch_entry *e; 2080 struct drbd_epoch_entry *e;
2024 struct digest_info *di = NULL; 2081 struct digest_info *di = NULL;
2025 int size, digest_size; 2082 int size, verb;
2026 unsigned int fault_type; 2083 unsigned int fault_type;
2027 struct p_block_req *p = 2084 struct p_block_req *p = &mdev->data.rbuf.block_req;
2028 (struct p_block_req *)h;
2029 const int brps = sizeof(*p)-sizeof(*h);
2030
2031 if (drbd_recv(mdev, h->payload, brps) != brps)
2032 return FALSE;
2033 2085
2034 sector = be64_to_cpu(p->sector); 2086 sector = be64_to_cpu(p->sector);
2035 size = be32_to_cpu(p->blksize); 2087 size = be32_to_cpu(p->blksize);
@@ -2046,12 +2098,31 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h)
2046 } 2098 }
2047 2099
2048 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) { 2100 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
2049 if (__ratelimit(&drbd_ratelimit_state)) 2101 verb = 1;
2102 switch (cmd) {
2103 case P_DATA_REQUEST:
2104 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2105 break;
2106 case P_RS_DATA_REQUEST:
2107 case P_CSUM_RS_REQUEST:
2108 case P_OV_REQUEST:
2109 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2110 break;
2111 case P_OV_REPLY:
2112 verb = 0;
2113 dec_rs_pending(mdev);
2114 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2115 break;
2116 default:
2117 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2118 cmdname(cmd));
2119 }
2120 if (verb && __ratelimit(&drbd_ratelimit_state))
2050 dev_err(DEV, "Can not satisfy peer's read request, " 2121 dev_err(DEV, "Can not satisfy peer's read request, "
2051 "no local data.\n"); 2122 "no local data.\n");
2052 drbd_send_ack_rp(mdev, h->command == P_DATA_REQUEST ? P_NEG_DREPLY : 2123
2053 P_NEG_RS_DREPLY , p); 2124 /* drain possibly payload */
2054 return drbd_drain_block(mdev, h->length - brps); 2125 return drbd_drain_block(mdev, digest_size);
2055 } 2126 }
2056 2127
2057 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD 2128 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
@@ -2063,31 +2134,21 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h)
2063 return FALSE; 2134 return FALSE;
2064 } 2135 }
2065 2136
2066 switch (h->command) { 2137 switch (cmd) {
2067 case P_DATA_REQUEST: 2138 case P_DATA_REQUEST:
2068 e->w.cb = w_e_end_data_req; 2139 e->w.cb = w_e_end_data_req;
2069 fault_type = DRBD_FAULT_DT_RD; 2140 fault_type = DRBD_FAULT_DT_RD;
2070 break; 2141 /* application IO, don't drbd_rs_begin_io */
2142 goto submit;
2143
2071 case P_RS_DATA_REQUEST: 2144 case P_RS_DATA_REQUEST:
2072 e->w.cb = w_e_end_rsdata_req; 2145 e->w.cb = w_e_end_rsdata_req;
2073 fault_type = DRBD_FAULT_RS_RD; 2146 fault_type = DRBD_FAULT_RS_RD;
2074 /* Eventually this should become asynchronously. Currently it
2075 * blocks the whole receiver just to delay the reading of a
2076 * resync data block.
2077 * the drbd_work_queue mechanism is made for this...
2078 */
2079 if (!drbd_rs_begin_io(mdev, sector)) {
2080 /* we have been interrupted,
2081 * probably connection lost! */
2082 D_ASSERT(signal_pending(current));
2083 goto out_free_e;
2084 }
2085 break; 2147 break;
2086 2148
2087 case P_OV_REPLY: 2149 case P_OV_REPLY:
2088 case P_CSUM_RS_REQUEST: 2150 case P_CSUM_RS_REQUEST:
2089 fault_type = DRBD_FAULT_RS_RD; 2151 fault_type = DRBD_FAULT_RS_RD;
2090 digest_size = h->length - brps ;
2091 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO); 2152 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
2092 if (!di) 2153 if (!di)
2093 goto out_free_e; 2154 goto out_free_e;
@@ -2095,31 +2156,25 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h)
2095 di->digest_size = digest_size; 2156 di->digest_size = digest_size;
2096 di->digest = (((char *)di)+sizeof(struct digest_info)); 2157 di->digest = (((char *)di)+sizeof(struct digest_info));
2097 2158
2159 e->digest = di;
2160 e->flags |= EE_HAS_DIGEST;
2161
2098 if (drbd_recv(mdev, di->digest, digest_size) != digest_size) 2162 if (drbd_recv(mdev, di->digest, digest_size) != digest_size)
2099 goto out_free_e; 2163 goto out_free_e;
2100 2164
2101 e->block_id = (u64)(unsigned long)di; 2165 if (cmd == P_CSUM_RS_REQUEST) {
2102 if (h->command == P_CSUM_RS_REQUEST) {
2103 D_ASSERT(mdev->agreed_pro_version >= 89); 2166 D_ASSERT(mdev->agreed_pro_version >= 89);
2104 e->w.cb = w_e_end_csum_rs_req; 2167 e->w.cb = w_e_end_csum_rs_req;
2105 } else if (h->command == P_OV_REPLY) { 2168 } else if (cmd == P_OV_REPLY) {
2106 e->w.cb = w_e_end_ov_reply; 2169 e->w.cb = w_e_end_ov_reply;
2107 dec_rs_pending(mdev); 2170 dec_rs_pending(mdev);
2108 break; 2171 /* drbd_rs_begin_io done when we sent this request,
2109 } 2172 * but accounting still needs to be done. */
2110 2173 goto submit_for_resync;
2111 if (!drbd_rs_begin_io(mdev, sector)) {
2112 /* we have been interrupted, probably connection lost! */
2113 D_ASSERT(signal_pending(current));
2114 goto out_free_e;
2115 } 2174 }
2116 break; 2175 break;
2117 2176
2118 case P_OV_REQUEST: 2177 case P_OV_REQUEST:
2119 if (mdev->state.conn >= C_CONNECTED &&
2120 mdev->state.conn != C_VERIFY_T)
2121 dev_warn(DEV, "ASSERT FAILED: got P_OV_REQUEST while being %s\n",
2122 drbd_conn_str(mdev->state.conn));
2123 if (mdev->ov_start_sector == ~(sector_t)0 && 2178 if (mdev->ov_start_sector == ~(sector_t)0 &&
2124 mdev->agreed_pro_version >= 90) { 2179 mdev->agreed_pro_version >= 90) {
2125 mdev->ov_start_sector = sector; 2180 mdev->ov_start_sector = sector;
@@ -2130,37 +2185,63 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h)
2130 } 2185 }
2131 e->w.cb = w_e_end_ov_req; 2186 e->w.cb = w_e_end_ov_req;
2132 fault_type = DRBD_FAULT_RS_RD; 2187 fault_type = DRBD_FAULT_RS_RD;
2133 /* Eventually this should become asynchronous. Currently it
2134 * blocks the whole receiver just to delay the reading of a
2135 * resync data block.
2136 * the drbd_work_queue mechanism is made for this...
2137 */
2138 if (!drbd_rs_begin_io(mdev, sector)) {
2139 /* we have been interrupted,
2140 * probably connection lost! */
2141 D_ASSERT(signal_pending(current));
2142 goto out_free_e;
2143 }
2144 break; 2188 break;
2145 2189
2146
2147 default: 2190 default:
2148 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n", 2191 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2149 cmdname(h->command)); 2192 cmdname(cmd));
2150 fault_type = DRBD_FAULT_MAX; 2193 fault_type = DRBD_FAULT_MAX;
2194 goto out_free_e;
2151 } 2195 }
2152 2196
2153 spin_lock_irq(&mdev->req_lock); 2197 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2154 list_add(&e->w.list, &mdev->read_ee); 2198 * wrt the receiver, but it is not as straightforward as it may seem.
2155 spin_unlock_irq(&mdev->req_lock); 2199 * Various places in the resync start and stop logic assume resync
2200 * requests are processed in order, requeuing this on the worker thread
2201 * introduces a bunch of new code for synchronization between threads.
2202 *
2203 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2204 * "forever", throttling after drbd_rs_begin_io will lock that extent
2205 * for application writes for the same time. For now, just throttle
2206 * here, where the rest of the code expects the receiver to sleep for
2207 * a while, anyways.
2208 */
2209
2210 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2211 * this defers syncer requests for some time, before letting at least
2212 * on request through. The resync controller on the receiving side
2213 * will adapt to the incoming rate accordingly.
2214 *
2215 * We cannot throttle here if remote is Primary/SyncTarget:
2216 * we would also throttle its application reads.
2217 * In that case, throttling is done on the SyncTarget only.
2218 */
2219 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev))
2220 msleep(100);
2221 if (drbd_rs_begin_io(mdev, e->sector))
2222 goto out_free_e;
2156 2223
2224submit_for_resync:
2225 atomic_add(size >> 9, &mdev->rs_sect_ev);
2226
2227submit:
2157 inc_unacked(mdev); 2228 inc_unacked(mdev);
2229 spin_lock_irq(&mdev->req_lock);
2230 list_add_tail(&e->w.list, &mdev->read_ee);
2231 spin_unlock_irq(&mdev->req_lock);
2158 2232
2159 if (drbd_submit_ee(mdev, e, READ, fault_type) == 0) 2233 if (drbd_submit_ee(mdev, e, READ, fault_type) == 0)
2160 return TRUE; 2234 return TRUE;
2161 2235
2236 /* drbd_submit_ee currently fails for one reason only:
2237 * not being able to allocate enough bios.
2238 * Is dropping the connection going to help? */
2239 spin_lock_irq(&mdev->req_lock);
2240 list_del(&e->w.list);
2241 spin_unlock_irq(&mdev->req_lock);
2242 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2243
2162out_free_e: 2244out_free_e:
2163 kfree(di);
2164 put_ldev(mdev); 2245 put_ldev(mdev);
2165 drbd_free_ee(mdev, e); 2246 drbd_free_ee(mdev, e);
2166 return FALSE; 2247 return FALSE;
@@ -2699,20 +2780,13 @@ static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2699 return 1; 2780 return 1;
2700} 2781}
2701 2782
2702static int receive_protocol(struct drbd_conf *mdev, struct p_header *h) 2783static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
2703{ 2784{
2704 struct p_protocol *p = (struct p_protocol *)h; 2785 struct p_protocol *p = &mdev->data.rbuf.protocol;
2705 int header_size, data_size;
2706 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; 2786 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
2707 int p_want_lose, p_two_primaries, cf; 2787 int p_want_lose, p_two_primaries, cf;
2708 char p_integrity_alg[SHARED_SECRET_MAX] = ""; 2788 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2709 2789
2710 header_size = sizeof(*p) - sizeof(*h);
2711 data_size = h->length - header_size;
2712
2713 if (drbd_recv(mdev, h->payload, header_size) != header_size)
2714 return FALSE;
2715
2716 p_proto = be32_to_cpu(p->protocol); 2790 p_proto = be32_to_cpu(p->protocol);
2717 p_after_sb_0p = be32_to_cpu(p->after_sb_0p); 2791 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2718 p_after_sb_1p = be32_to_cpu(p->after_sb_1p); 2792 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
@@ -2805,39 +2879,46 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2805 return tfm; 2879 return tfm;
2806} 2880}
2807 2881
2808static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h) 2882static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int packet_size)
2809{ 2883{
2810 int ok = TRUE; 2884 int ok = TRUE;
2811 struct p_rs_param_89 *p = (struct p_rs_param_89 *)h; 2885 struct p_rs_param_95 *p = &mdev->data.rbuf.rs_param_95;
2812 unsigned int header_size, data_size, exp_max_sz; 2886 unsigned int header_size, data_size, exp_max_sz;
2813 struct crypto_hash *verify_tfm = NULL; 2887 struct crypto_hash *verify_tfm = NULL;
2814 struct crypto_hash *csums_tfm = NULL; 2888 struct crypto_hash *csums_tfm = NULL;
2815 const int apv = mdev->agreed_pro_version; 2889 const int apv = mdev->agreed_pro_version;
2890 int *rs_plan_s = NULL;
2891 int fifo_size = 0;
2816 2892
2817 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param) 2893 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
2818 : apv == 88 ? sizeof(struct p_rs_param) 2894 : apv == 88 ? sizeof(struct p_rs_param)
2819 + SHARED_SECRET_MAX 2895 + SHARED_SECRET_MAX
2820 : /* 89 */ sizeof(struct p_rs_param_89); 2896 : apv <= 94 ? sizeof(struct p_rs_param_89)
2897 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
2821 2898
2822 if (h->length > exp_max_sz) { 2899 if (packet_size > exp_max_sz) {
2823 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n", 2900 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
2824 h->length, exp_max_sz); 2901 packet_size, exp_max_sz);
2825 return FALSE; 2902 return FALSE;
2826 } 2903 }
2827 2904
2828 if (apv <= 88) { 2905 if (apv <= 88) {
2829 header_size = sizeof(struct p_rs_param) - sizeof(*h); 2906 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header80);
2830 data_size = h->length - header_size; 2907 data_size = packet_size - header_size;
2831 } else /* apv >= 89 */ { 2908 } else if (apv <= 94) {
2832 header_size = sizeof(struct p_rs_param_89) - sizeof(*h); 2909 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header80);
2833 data_size = h->length - header_size; 2910 data_size = packet_size - header_size;
2911 D_ASSERT(data_size == 0);
2912 } else {
2913 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header80);
2914 data_size = packet_size - header_size;
2834 D_ASSERT(data_size == 0); 2915 D_ASSERT(data_size == 0);
2835 } 2916 }
2836 2917
2837 /* initialize verify_alg and csums_alg */ 2918 /* initialize verify_alg and csums_alg */
2838 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); 2919 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
2839 2920
2840 if (drbd_recv(mdev, h->payload, header_size) != header_size) 2921 if (drbd_recv(mdev, &p->head.payload, header_size) != header_size)
2841 return FALSE; 2922 return FALSE;
2842 2923
2843 mdev->sync_conf.rate = be32_to_cpu(p->rate); 2924 mdev->sync_conf.rate = be32_to_cpu(p->rate);
@@ -2896,6 +2977,22 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h)
2896 } 2977 }
2897 } 2978 }
2898 2979
2980 if (apv > 94) {
2981 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2982 mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
2983 mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
2984 mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
2985 mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
2986
2987 fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
2988 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
2989 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
2990 if (!rs_plan_s) {
2991 dev_err(DEV, "kmalloc of fifo_buffer failed");
2992 goto disconnect;
2993 }
2994 }
2995 }
2899 2996
2900 spin_lock(&mdev->peer_seq_lock); 2997 spin_lock(&mdev->peer_seq_lock);
2901 /* lock against drbd_nl_syncer_conf() */ 2998 /* lock against drbd_nl_syncer_conf() */
@@ -2913,6 +3010,12 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h)
2913 mdev->csums_tfm = csums_tfm; 3010 mdev->csums_tfm = csums_tfm;
2914 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg); 3011 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
2915 } 3012 }
3013 if (fifo_size != mdev->rs_plan_s.size) {
3014 kfree(mdev->rs_plan_s.values);
3015 mdev->rs_plan_s.values = rs_plan_s;
3016 mdev->rs_plan_s.size = fifo_size;
3017 mdev->rs_planed = 0;
3018 }
2916 spin_unlock(&mdev->peer_seq_lock); 3019 spin_unlock(&mdev->peer_seq_lock);
2917 } 3020 }
2918 3021
@@ -2946,19 +3049,15 @@ static void warn_if_differ_considerably(struct drbd_conf *mdev,
2946 (unsigned long long)a, (unsigned long long)b); 3049 (unsigned long long)a, (unsigned long long)b);
2947} 3050}
2948 3051
2949static int receive_sizes(struct drbd_conf *mdev, struct p_header *h) 3052static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
2950{ 3053{
2951 struct p_sizes *p = (struct p_sizes *)h; 3054 struct p_sizes *p = &mdev->data.rbuf.sizes;
2952 enum determine_dev_size dd = unchanged; 3055 enum determine_dev_size dd = unchanged;
2953 unsigned int max_seg_s; 3056 unsigned int max_seg_s;
2954 sector_t p_size, p_usize, my_usize; 3057 sector_t p_size, p_usize, my_usize;
2955 int ldsc = 0; /* local disk size changed */ 3058 int ldsc = 0; /* local disk size changed */
2956 enum dds_flags ddsf; 3059 enum dds_flags ddsf;
2957 3060
2958 ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
2959 if (drbd_recv(mdev, h->payload, h->length) != h->length)
2960 return FALSE;
2961
2962 p_size = be64_to_cpu(p->d_size); 3061 p_size = be64_to_cpu(p->d_size);
2963 p_usize = be64_to_cpu(p->u_size); 3062 p_usize = be64_to_cpu(p->u_size);
2964 3063
@@ -3028,6 +3127,8 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
3028 3127
3029 if (mdev->agreed_pro_version < 94) 3128 if (mdev->agreed_pro_version < 94)
3030 max_seg_s = be32_to_cpu(p->max_segment_size); 3129 max_seg_s = be32_to_cpu(p->max_segment_size);
3130 else if (mdev->agreed_pro_version == 94)
3131 max_seg_s = DRBD_MAX_SIZE_H80_PACKET;
3031 else /* drbd 8.3.8 onwards */ 3132 else /* drbd 8.3.8 onwards */
3032 max_seg_s = DRBD_MAX_SEGMENT_SIZE; 3133 max_seg_s = DRBD_MAX_SEGMENT_SIZE;
3033 3134
@@ -3061,16 +3162,12 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
3061 return TRUE; 3162 return TRUE;
3062} 3163}
3063 3164
3064static int receive_uuids(struct drbd_conf *mdev, struct p_header *h) 3165static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
3065{ 3166{
3066 struct p_uuids *p = (struct p_uuids *)h; 3167 struct p_uuids *p = &mdev->data.rbuf.uuids;
3067 u64 *p_uuid; 3168 u64 *p_uuid;
3068 int i; 3169 int i;
3069 3170
3070 ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
3071 if (drbd_recv(mdev, h->payload, h->length) != h->length)
3072 return FALSE;
3073
3074 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO); 3171 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3075 3172
3076 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++) 3173 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
@@ -3106,6 +3203,11 @@ static int receive_uuids(struct drbd_conf *mdev, struct p_header *h)
3106 drbd_md_sync(mdev); 3203 drbd_md_sync(mdev);
3107 } 3204 }
3108 put_ldev(mdev); 3205 put_ldev(mdev);
3206 } else if (mdev->state.disk < D_INCONSISTENT &&
3207 mdev->state.role == R_PRIMARY) {
3208 /* I am a diskless primary, the peer just created a new current UUID
3209 for me. */
3210 drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3109 } 3211 }
3110 3212
3111 /* Before we test for the disk state, we should wait until an eventually 3213 /* Before we test for the disk state, we should wait until an eventually
@@ -3149,16 +3251,12 @@ static union drbd_state convert_state(union drbd_state ps)
3149 return ms; 3251 return ms;
3150} 3252}
3151 3253
3152static int receive_req_state(struct drbd_conf *mdev, struct p_header *h) 3254static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
3153{ 3255{
3154 struct p_req_state *p = (struct p_req_state *)h; 3256 struct p_req_state *p = &mdev->data.rbuf.req_state;
3155 union drbd_state mask, val; 3257 union drbd_state mask, val;
3156 int rv; 3258 int rv;
3157 3259
3158 ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
3159 if (drbd_recv(mdev, h->payload, h->length) != h->length)
3160 return FALSE;
3161
3162 mask.i = be32_to_cpu(p->mask); 3260 mask.i = be32_to_cpu(p->mask);
3163 val.i = be32_to_cpu(p->val); 3261 val.i = be32_to_cpu(p->val);
3164 3262
@@ -3179,20 +3277,14 @@ static int receive_req_state(struct drbd_conf *mdev, struct p_header *h)
3179 return TRUE; 3277 return TRUE;
3180} 3278}
3181 3279
3182static int receive_state(struct drbd_conf *mdev, struct p_header *h) 3280static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
3183{ 3281{
3184 struct p_state *p = (struct p_state *)h; 3282 struct p_state *p = &mdev->data.rbuf.state;
3185 enum drbd_conns nconn, oconn; 3283 union drbd_state os, ns, peer_state;
3186 union drbd_state ns, peer_state;
3187 enum drbd_disk_state real_peer_disk; 3284 enum drbd_disk_state real_peer_disk;
3285 enum chg_state_flags cs_flags;
3188 int rv; 3286 int rv;
3189 3287
3190 ERR_IF(h->length != (sizeof(*p)-sizeof(*h)))
3191 return FALSE;
3192
3193 if (drbd_recv(mdev, h->payload, h->length) != h->length)
3194 return FALSE;
3195
3196 peer_state.i = be32_to_cpu(p->state); 3288 peer_state.i = be32_to_cpu(p->state);
3197 3289
3198 real_peer_disk = peer_state.disk; 3290 real_peer_disk = peer_state.disk;
@@ -3203,38 +3295,72 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h)
3203 3295
3204 spin_lock_irq(&mdev->req_lock); 3296 spin_lock_irq(&mdev->req_lock);
3205 retry: 3297 retry:
3206 oconn = nconn = mdev->state.conn; 3298 os = ns = mdev->state;
3207 spin_unlock_irq(&mdev->req_lock); 3299 spin_unlock_irq(&mdev->req_lock);
3208 3300
3209 if (nconn == C_WF_REPORT_PARAMS) 3301 /* peer says his disk is uptodate, while we think it is inconsistent,
3210 nconn = C_CONNECTED; 3302 * and this happens while we think we have a sync going on. */
3303 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3304 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3305 /* If we are (becoming) SyncSource, but peer is still in sync
3306 * preparation, ignore its uptodate-ness to avoid flapping, it
3307 * will change to inconsistent once the peer reaches active
3308 * syncing states.
3309 * It may have changed syncer-paused flags, however, so we
3310 * cannot ignore this completely. */
3311 if (peer_state.conn > C_CONNECTED &&
3312 peer_state.conn < C_SYNC_SOURCE)
3313 real_peer_disk = D_INCONSISTENT;
3314
3315 /* if peer_state changes to connected at the same time,
3316 * it explicitly notifies us that it finished resync.
3317 * Maybe we should finish it up, too? */
3318 else if (os.conn >= C_SYNC_SOURCE &&
3319 peer_state.conn == C_CONNECTED) {
3320 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3321 drbd_resync_finished(mdev);
3322 return TRUE;
3323 }
3324 }
3325
3326 /* peer says his disk is inconsistent, while we think it is uptodate,
3327 * and this happens while the peer still thinks we have a sync going on,
3328 * but we think we are already done with the sync.
3329 * We ignore this to avoid flapping pdsk.
3330 * This should not happen, if the peer is a recent version of drbd. */
3331 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3332 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3333 real_peer_disk = D_UP_TO_DATE;
3334
3335 if (ns.conn == C_WF_REPORT_PARAMS)
3336 ns.conn = C_CONNECTED;
3211 3337
3212 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING && 3338 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3213 get_ldev_if_state(mdev, D_NEGOTIATING)) { 3339 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3214 int cr; /* consider resync */ 3340 int cr; /* consider resync */
3215 3341
3216 /* if we established a new connection */ 3342 /* if we established a new connection */
3217 cr = (oconn < C_CONNECTED); 3343 cr = (os.conn < C_CONNECTED);
3218 /* if we had an established connection 3344 /* if we had an established connection
3219 * and one of the nodes newly attaches a disk */ 3345 * and one of the nodes newly attaches a disk */
3220 cr |= (oconn == C_CONNECTED && 3346 cr |= (os.conn == C_CONNECTED &&
3221 (peer_state.disk == D_NEGOTIATING || 3347 (peer_state.disk == D_NEGOTIATING ||
3222 mdev->state.disk == D_NEGOTIATING)); 3348 os.disk == D_NEGOTIATING));
3223 /* if we have both been inconsistent, and the peer has been 3349 /* if we have both been inconsistent, and the peer has been
3224 * forced to be UpToDate with --overwrite-data */ 3350 * forced to be UpToDate with --overwrite-data */
3225 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags); 3351 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3226 /* if we had been plain connected, and the admin requested to 3352 /* if we had been plain connected, and the admin requested to
3227 * start a sync by "invalidate" or "invalidate-remote" */ 3353 * start a sync by "invalidate" or "invalidate-remote" */
3228 cr |= (oconn == C_CONNECTED && 3354 cr |= (os.conn == C_CONNECTED &&
3229 (peer_state.conn >= C_STARTING_SYNC_S && 3355 (peer_state.conn >= C_STARTING_SYNC_S &&
3230 peer_state.conn <= C_WF_BITMAP_T)); 3356 peer_state.conn <= C_WF_BITMAP_T));
3231 3357
3232 if (cr) 3358 if (cr)
3233 nconn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk); 3359 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
3234 3360
3235 put_ldev(mdev); 3361 put_ldev(mdev);
3236 if (nconn == C_MASK) { 3362 if (ns.conn == C_MASK) {
3237 nconn = C_CONNECTED; 3363 ns.conn = C_CONNECTED;
3238 if (mdev->state.disk == D_NEGOTIATING) { 3364 if (mdev->state.disk == D_NEGOTIATING) {
3239 drbd_force_state(mdev, NS(disk, D_DISKLESS)); 3365 drbd_force_state(mdev, NS(disk, D_DISKLESS));
3240 } else if (peer_state.disk == D_NEGOTIATING) { 3366 } else if (peer_state.disk == D_NEGOTIATING) {
@@ -3244,7 +3370,7 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h)
3244 } else { 3370 } else {
3245 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags)) 3371 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
3246 return FALSE; 3372 return FALSE;
3247 D_ASSERT(oconn == C_WF_REPORT_PARAMS); 3373 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
3248 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 3374 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
3249 return FALSE; 3375 return FALSE;
3250 } 3376 }
@@ -3252,18 +3378,28 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h)
3252 } 3378 }
3253 3379
3254 spin_lock_irq(&mdev->req_lock); 3380 spin_lock_irq(&mdev->req_lock);
3255 if (mdev->state.conn != oconn) 3381 if (mdev->state.i != os.i)
3256 goto retry; 3382 goto retry;
3257 clear_bit(CONSIDER_RESYNC, &mdev->flags); 3383 clear_bit(CONSIDER_RESYNC, &mdev->flags);
3258 ns.i = mdev->state.i;
3259 ns.conn = nconn;
3260 ns.peer = peer_state.role; 3384 ns.peer = peer_state.role;
3261 ns.pdsk = real_peer_disk; 3385 ns.pdsk = real_peer_disk;
3262 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp); 3386 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
3263 if ((nconn == C_CONNECTED || nconn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING) 3387 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
3264 ns.disk = mdev->new_state_tmp.disk; 3388 ns.disk = mdev->new_state_tmp.disk;
3265 3389 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3266 rv = _drbd_set_state(mdev, ns, CS_VERBOSE | CS_HARD, NULL); 3390 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
3391 test_bit(NEW_CUR_UUID, &mdev->flags)) {
3392 /* Do not allow tl_restart(resend) for a rebooted peer. We can only allow this
3393 for temporal network outages! */
3394 spin_unlock_irq(&mdev->req_lock);
3395 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
3396 tl_clear(mdev);
3397 drbd_uuid_new_current(mdev);
3398 clear_bit(NEW_CUR_UUID, &mdev->flags);
3399 drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0));
3400 return FALSE;
3401 }
3402 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
3267 ns = mdev->state; 3403 ns = mdev->state;
3268 spin_unlock_irq(&mdev->req_lock); 3404 spin_unlock_irq(&mdev->req_lock);
3269 3405
@@ -3272,8 +3408,8 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h)
3272 return FALSE; 3408 return FALSE;
3273 } 3409 }
3274 3410
3275 if (oconn > C_WF_REPORT_PARAMS) { 3411 if (os.conn > C_WF_REPORT_PARAMS) {
3276 if (nconn > C_CONNECTED && peer_state.conn <= C_CONNECTED && 3412 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
3277 peer_state.disk != D_NEGOTIATING ) { 3413 peer_state.disk != D_NEGOTIATING ) {
3278 /* we want resync, peer has not yet decided to sync... */ 3414 /* we want resync, peer has not yet decided to sync... */
3279 /* Nowadays only used when forcing a node into primary role and 3415 /* Nowadays only used when forcing a node into primary role and
@@ -3290,9 +3426,9 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h)
3290 return TRUE; 3426 return TRUE;
3291} 3427}
3292 3428
3293static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header *h) 3429static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
3294{ 3430{
3295 struct p_rs_uuid *p = (struct p_rs_uuid *)h; 3431 struct p_rs_uuid *p = &mdev->data.rbuf.rs_uuid;
3296 3432
3297 wait_event(mdev->misc_wait, 3433 wait_event(mdev->misc_wait,
3298 mdev->state.conn == C_WF_SYNC_UUID || 3434 mdev->state.conn == C_WF_SYNC_UUID ||
@@ -3301,10 +3437,6 @@ static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header *h)
3301 3437
3302 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */ 3438 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3303 3439
3304 ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
3305 if (drbd_recv(mdev, h->payload, h->length) != h->length)
3306 return FALSE;
3307
3308 /* Here the _drbd_uuid_ functions are right, current should 3440 /* Here the _drbd_uuid_ functions are right, current should
3309 _not_ be rotated into the history */ 3441 _not_ be rotated into the history */
3310 if (get_ldev_if_state(mdev, D_NEGOTIATING)) { 3442 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
@@ -3323,14 +3455,14 @@ static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header *h)
3323enum receive_bitmap_ret { OK, DONE, FAILED }; 3455enum receive_bitmap_ret { OK, DONE, FAILED };
3324 3456
3325static enum receive_bitmap_ret 3457static enum receive_bitmap_ret
3326receive_bitmap_plain(struct drbd_conf *mdev, struct p_header *h, 3458receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3327 unsigned long *buffer, struct bm_xfer_ctx *c) 3459 unsigned long *buffer, struct bm_xfer_ctx *c)
3328{ 3460{
3329 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset); 3461 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3330 unsigned want = num_words * sizeof(long); 3462 unsigned want = num_words * sizeof(long);
3331 3463
3332 if (want != h->length) { 3464 if (want != data_size) {
3333 dev_err(DEV, "%s:want (%u) != h->length (%u)\n", __func__, want, h->length); 3465 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
3334 return FAILED; 3466 return FAILED;
3335 } 3467 }
3336 if (want == 0) 3468 if (want == 0)
@@ -3359,7 +3491,7 @@ recv_bm_rle_bits(struct drbd_conf *mdev,
3359 u64 tmp; 3491 u64 tmp;
3360 unsigned long s = c->bit_offset; 3492 unsigned long s = c->bit_offset;
3361 unsigned long e; 3493 unsigned long e;
3362 int len = p->head.length - (sizeof(*p) - sizeof(p->head)); 3494 int len = be16_to_cpu(p->head.length) - (sizeof(*p) - sizeof(p->head));
3363 int toggle = DCBP_get_start(p); 3495 int toggle = DCBP_get_start(p);
3364 int have; 3496 int have;
3365 int bits; 3497 int bits;
@@ -3428,7 +3560,7 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3428 const char *direction, struct bm_xfer_ctx *c) 3560 const char *direction, struct bm_xfer_ctx *c)
3429{ 3561{
3430 /* what would it take to transfer it "plaintext" */ 3562 /* what would it take to transfer it "plaintext" */
3431 unsigned plain = sizeof(struct p_header) * 3563 unsigned plain = sizeof(struct p_header80) *
3432 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1) 3564 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3433 + c->bm_words * sizeof(long); 3565 + c->bm_words * sizeof(long);
3434 unsigned total = c->bytes[0] + c->bytes[1]; 3566 unsigned total = c->bytes[0] + c->bytes[1];
@@ -3466,12 +3598,13 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3466 in order to be agnostic to the 32 vs 64 bits issue. 3598 in order to be agnostic to the 32 vs 64 bits issue.
3467 3599
3468 returns 0 on failure, 1 if we successfully received it. */ 3600 returns 0 on failure, 1 if we successfully received it. */
3469static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h) 3601static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
3470{ 3602{
3471 struct bm_xfer_ctx c; 3603 struct bm_xfer_ctx c;
3472 void *buffer; 3604 void *buffer;
3473 enum receive_bitmap_ret ret; 3605 enum receive_bitmap_ret ret;
3474 int ok = FALSE; 3606 int ok = FALSE;
3607 struct p_header80 *h = &mdev->data.rbuf.header.h80;
3475 3608
3476 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); 3609 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
3477 3610
@@ -3491,39 +3624,39 @@ static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h)
3491 }; 3624 };
3492 3625
3493 do { 3626 do {
3494 if (h->command == P_BITMAP) { 3627 if (cmd == P_BITMAP) {
3495 ret = receive_bitmap_plain(mdev, h, buffer, &c); 3628 ret = receive_bitmap_plain(mdev, data_size, buffer, &c);
3496 } else if (h->command == P_COMPRESSED_BITMAP) { 3629 } else if (cmd == P_COMPRESSED_BITMAP) {
3497 /* MAYBE: sanity check that we speak proto >= 90, 3630 /* MAYBE: sanity check that we speak proto >= 90,
3498 * and the feature is enabled! */ 3631 * and the feature is enabled! */
3499 struct p_compressed_bm *p; 3632 struct p_compressed_bm *p;
3500 3633
3501 if (h->length > BM_PACKET_PAYLOAD_BYTES) { 3634 if (data_size > BM_PACKET_PAYLOAD_BYTES) {
3502 dev_err(DEV, "ReportCBitmap packet too large\n"); 3635 dev_err(DEV, "ReportCBitmap packet too large\n");
3503 goto out; 3636 goto out;
3504 } 3637 }
3505 /* use the page buff */ 3638 /* use the page buff */
3506 p = buffer; 3639 p = buffer;
3507 memcpy(p, h, sizeof(*h)); 3640 memcpy(p, h, sizeof(*h));
3508 if (drbd_recv(mdev, p->head.payload, h->length) != h->length) 3641 if (drbd_recv(mdev, p->head.payload, data_size) != data_size)
3509 goto out; 3642 goto out;
3510 if (p->head.length <= (sizeof(*p) - sizeof(p->head))) { 3643 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3511 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", p->head.length); 3644 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
3512 return FAILED; 3645 return FAILED;
3513 } 3646 }
3514 ret = decode_bitmap_c(mdev, p, &c); 3647 ret = decode_bitmap_c(mdev, p, &c);
3515 } else { 3648 } else {
3516 dev_warn(DEV, "receive_bitmap: h->command neither ReportBitMap nor ReportCBitMap (is 0x%x)", h->command); 3649 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
3517 goto out; 3650 goto out;
3518 } 3651 }
3519 3652
3520 c.packets[h->command == P_BITMAP]++; 3653 c.packets[cmd == P_BITMAP]++;
3521 c.bytes[h->command == P_BITMAP] += sizeof(struct p_header) + h->length; 3654 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header80) + data_size;
3522 3655
3523 if (ret != OK) 3656 if (ret != OK)
3524 break; 3657 break;
3525 3658
3526 if (!drbd_recv_header(mdev, h)) 3659 if (!drbd_recv_header(mdev, &cmd, &data_size))
3527 goto out; 3660 goto out;
3528 } while (ret == OK); 3661 } while (ret == OK);
3529 if (ret == FAILED) 3662 if (ret == FAILED)
@@ -3554,17 +3687,16 @@ static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h)
3554 return ok; 3687 return ok;
3555} 3688}
3556 3689
3557static int receive_skip_(struct drbd_conf *mdev, struct p_header *h, int silent) 3690static int receive_skip(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
3558{ 3691{
3559 /* TODO zero copy sink :) */ 3692 /* TODO zero copy sink :) */
3560 static char sink[128]; 3693 static char sink[128];
3561 int size, want, r; 3694 int size, want, r;
3562 3695
3563 if (!silent) 3696 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
3564 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n", 3697 cmd, data_size);
3565 h->command, h->length);
3566 3698
3567 size = h->length; 3699 size = data_size;
3568 while (size > 0) { 3700 while (size > 0) {
3569 want = min_t(int, size, sizeof(sink)); 3701 want = min_t(int, size, sizeof(sink));
3570 r = drbd_recv(mdev, sink, want); 3702 r = drbd_recv(mdev, sink, want);
@@ -3574,17 +3706,7 @@ static int receive_skip_(struct drbd_conf *mdev, struct p_header *h, int silent)
3574 return size == 0; 3706 return size == 0;
3575} 3707}
3576 3708
3577static int receive_skip(struct drbd_conf *mdev, struct p_header *h) 3709static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
3578{
3579 return receive_skip_(mdev, h, 0);
3580}
3581
3582static int receive_skip_silent(struct drbd_conf *mdev, struct p_header *h)
3583{
3584 return receive_skip_(mdev, h, 1);
3585}
3586
3587static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h)
3588{ 3710{
3589 if (mdev->state.disk >= D_INCONSISTENT) 3711 if (mdev->state.disk >= D_INCONSISTENT)
3590 drbd_kick_lo(mdev); 3712 drbd_kick_lo(mdev);
@@ -3596,108 +3718,94 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h)
3596 return TRUE; 3718 return TRUE;
3597} 3719}
3598 3720
3599typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, struct p_header *); 3721typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packets cmd, unsigned int to_receive);
3600 3722
3601static drbd_cmd_handler_f drbd_default_handler[] = { 3723struct data_cmd {
3602 [P_DATA] = receive_Data, 3724 int expect_payload;
3603 [P_DATA_REPLY] = receive_DataReply, 3725 size_t pkt_size;
3604 [P_RS_DATA_REPLY] = receive_RSDataReply, 3726 drbd_cmd_handler_f function;
3605 [P_BARRIER] = receive_Barrier, 3727};
3606 [P_BITMAP] = receive_bitmap, 3728
3607 [P_COMPRESSED_BITMAP] = receive_bitmap, 3729static struct data_cmd drbd_cmd_handler[] = {
3608 [P_UNPLUG_REMOTE] = receive_UnplugRemote, 3730 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
3609 [P_DATA_REQUEST] = receive_DataRequest, 3731 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
3610 [P_RS_DATA_REQUEST] = receive_DataRequest, 3732 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
3611 [P_SYNC_PARAM] = receive_SyncParam, 3733 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
3612 [P_SYNC_PARAM89] = receive_SyncParam, 3734 [P_BITMAP] = { 1, sizeof(struct p_header80), receive_bitmap } ,
3613 [P_PROTOCOL] = receive_protocol, 3735 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header80), receive_bitmap } ,
3614 [P_UUIDS] = receive_uuids, 3736 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header80), receive_UnplugRemote },
3615 [P_SIZES] = receive_sizes, 3737 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3616 [P_STATE] = receive_state, 3738 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3617 [P_STATE_CHG_REQ] = receive_req_state, 3739 [P_SYNC_PARAM] = { 1, sizeof(struct p_header80), receive_SyncParam },
3618 [P_SYNC_UUID] = receive_sync_uuid, 3740 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header80), receive_SyncParam },
3619 [P_OV_REQUEST] = receive_DataRequest, 3741 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
3620 [P_OV_REPLY] = receive_DataRequest, 3742 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
3621 [P_CSUM_RS_REQUEST] = receive_DataRequest, 3743 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
3622 [P_DELAY_PROBE] = receive_skip_silent, 3744 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
3745 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
3746 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
3747 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3748 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3749 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3750 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
3623 /* anything missing from this table is in 3751 /* anything missing from this table is in
3624 * the asender_tbl, see get_asender_cmd */ 3752 * the asender_tbl, see get_asender_cmd */
3625 [P_MAX_CMD] = NULL, 3753 [P_MAX_CMD] = { 0, 0, NULL },
3626}; 3754};
3627 3755
3628static drbd_cmd_handler_f *drbd_cmd_handler = drbd_default_handler; 3756/* All handler functions that expect a sub-header get that sub-heder in
3629static drbd_cmd_handler_f *drbd_opt_cmd_handler; 3757 mdev->data.rbuf.header.head.payload.
3758
3759 Usually in mdev->data.rbuf.header.head the callback can find the usual
3760 p_header, but they may not rely on that. Since there is also p_header95 !
3761 */
3630 3762
3631static void drbdd(struct drbd_conf *mdev) 3763static void drbdd(struct drbd_conf *mdev)
3632{ 3764{
3633 drbd_cmd_handler_f handler; 3765 union p_header *header = &mdev->data.rbuf.header;
3634 struct p_header *header = &mdev->data.rbuf.header; 3766 unsigned int packet_size;
3767 enum drbd_packets cmd;
3768 size_t shs; /* sub header size */
3769 int rv;
3635 3770
3636 while (get_t_state(&mdev->receiver) == Running) { 3771 while (get_t_state(&mdev->receiver) == Running) {
3637 drbd_thread_current_set_cpu(mdev); 3772 drbd_thread_current_set_cpu(mdev);
3638 if (!drbd_recv_header(mdev, header)) { 3773 if (!drbd_recv_header(mdev, &cmd, &packet_size))
3639 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); 3774 goto err_out;
3640 break;
3641 }
3642 3775
3643 if (header->command < P_MAX_CMD) 3776 if (unlikely(cmd >= P_MAX_CMD || !drbd_cmd_handler[cmd].function)) {
3644 handler = drbd_cmd_handler[header->command]; 3777 dev_err(DEV, "unknown packet type %d, l: %d!\n", cmd, packet_size);
3645 else if (P_MAY_IGNORE < header->command 3778 goto err_out;
3646 && header->command < P_MAX_OPT_CMD) 3779 }
3647 handler = drbd_opt_cmd_handler[header->command-P_MAY_IGNORE];
3648 else if (header->command > P_MAX_OPT_CMD)
3649 handler = receive_skip;
3650 else
3651 handler = NULL;
3652 3780
3653 if (unlikely(!handler)) { 3781 shs = drbd_cmd_handler[cmd].pkt_size - sizeof(union p_header);
3654 dev_err(DEV, "unknown packet type %d, l: %d!\n", 3782 rv = drbd_recv(mdev, &header->h80.payload, shs);
3655 header->command, header->length); 3783 if (unlikely(rv != shs)) {
3656 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); 3784 dev_err(DEV, "short read while reading sub header: rv=%d\n", rv);
3657 break; 3785 goto err_out;
3658 } 3786 }
3659 if (unlikely(!handler(mdev, header))) { 3787
3660 dev_err(DEV, "error receiving %s, l: %d!\n", 3788 if (packet_size - shs > 0 && !drbd_cmd_handler[cmd].expect_payload) {
3661 cmdname(header->command), header->length); 3789 dev_err(DEV, "No payload expected %s l:%d\n", cmdname(cmd), packet_size);
3662 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); 3790 goto err_out;
3663 break;
3664 } 3791 }
3665 }
3666}
3667 3792
3668static void drbd_fail_pending_reads(struct drbd_conf *mdev) 3793 rv = drbd_cmd_handler[cmd].function(mdev, cmd, packet_size - shs);
3669{
3670 struct hlist_head *slot;
3671 struct hlist_node *pos;
3672 struct hlist_node *tmp;
3673 struct drbd_request *req;
3674 int i;
3675 3794
3676 /* 3795 if (unlikely(!rv)) {
3677 * Application READ requests 3796 dev_err(DEV, "error receiving %s, l: %d!\n",
3678 */ 3797 cmdname(cmd), packet_size);
3679 spin_lock_irq(&mdev->req_lock); 3798 goto err_out;
3680 for (i = 0; i < APP_R_HSIZE; i++) {
3681 slot = mdev->app_reads_hash+i;
3682 hlist_for_each_entry_safe(req, pos, tmp, slot, colision) {
3683 /* it may (but should not any longer!)
3684 * be on the work queue; if that assert triggers,
3685 * we need to also grab the
3686 * spin_lock_irq(&mdev->data.work.q_lock);
3687 * and list_del_init here. */
3688 D_ASSERT(list_empty(&req->w.list));
3689 /* It would be nice to complete outside of spinlock.
3690 * But this is easier for now. */
3691 _req_mod(req, connection_lost_while_pending);
3692 } 3799 }
3693 } 3800 }
3694 for (i = 0; i < APP_R_HSIZE; i++)
3695 if (!hlist_empty(mdev->app_reads_hash+i))
3696 dev_warn(DEV, "ASSERT FAILED: app_reads_hash[%d].first: "
3697 "%p, should be NULL\n", i, mdev->app_reads_hash[i].first);
3698 3801
3699 memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *)); 3802 if (0) {
3700 spin_unlock_irq(&mdev->req_lock); 3803 err_out:
3804 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
3805 }
3806 /* If we leave here, we probably want to update at least the
3807 * "Connected" indicator on stable storage. Do so explicitly here. */
3808 drbd_md_sync(mdev);
3701} 3809}
3702 3810
3703void drbd_flush_workqueue(struct drbd_conf *mdev) 3811void drbd_flush_workqueue(struct drbd_conf *mdev)
@@ -3710,6 +3818,36 @@ void drbd_flush_workqueue(struct drbd_conf *mdev)
3710 wait_for_completion(&barr.done); 3818 wait_for_completion(&barr.done);
3711} 3819}
3712 3820
3821void drbd_free_tl_hash(struct drbd_conf *mdev)
3822{
3823 struct hlist_head *h;
3824
3825 spin_lock_irq(&mdev->req_lock);
3826
3827 if (!mdev->tl_hash || mdev->state.conn != C_STANDALONE) {
3828 spin_unlock_irq(&mdev->req_lock);
3829 return;
3830 }
3831 /* paranoia code */
3832 for (h = mdev->ee_hash; h < mdev->ee_hash + mdev->ee_hash_s; h++)
3833 if (h->first)
3834 dev_err(DEV, "ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n",
3835 (int)(h - mdev->ee_hash), h->first);
3836 kfree(mdev->ee_hash);
3837 mdev->ee_hash = NULL;
3838 mdev->ee_hash_s = 0;
3839
3840 /* paranoia code */
3841 for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++)
3842 if (h->first)
3843 dev_err(DEV, "ASSERT FAILED tl_hash[%u] == %p, expected NULL\n",
3844 (int)(h - mdev->tl_hash), h->first);
3845 kfree(mdev->tl_hash);
3846 mdev->tl_hash = NULL;
3847 mdev->tl_hash_s = 0;
3848 spin_unlock_irq(&mdev->req_lock);
3849}
3850
3713static void drbd_disconnect(struct drbd_conf *mdev) 3851static void drbd_disconnect(struct drbd_conf *mdev)
3714{ 3852{
3715 enum drbd_fencing_p fp; 3853 enum drbd_fencing_p fp;
@@ -3727,6 +3865,7 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3727 drbd_thread_stop(&mdev->asender); 3865 drbd_thread_stop(&mdev->asender);
3728 drbd_free_sock(mdev); 3866 drbd_free_sock(mdev);
3729 3867
3868 /* wait for current activity to cease. */
3730 spin_lock_irq(&mdev->req_lock); 3869 spin_lock_irq(&mdev->req_lock);
3731 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee); 3870 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
3732 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee); 3871 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
@@ -3751,7 +3890,6 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3751 3890
3752 /* make sure syncer is stopped and w_resume_next_sg queued */ 3891 /* make sure syncer is stopped and w_resume_next_sg queued */
3753 del_timer_sync(&mdev->resync_timer); 3892 del_timer_sync(&mdev->resync_timer);
3754 set_bit(STOP_SYNC_TIMER, &mdev->flags);
3755 resync_timer_fn((unsigned long)mdev); 3893 resync_timer_fn((unsigned long)mdev);
3756 3894
3757 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier, 3895 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
@@ -3766,11 +3904,9 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3766 kfree(mdev->p_uuid); 3904 kfree(mdev->p_uuid);
3767 mdev->p_uuid = NULL; 3905 mdev->p_uuid = NULL;
3768 3906
3769 if (!mdev->state.susp) 3907 if (!is_susp(mdev->state))
3770 tl_clear(mdev); 3908 tl_clear(mdev);
3771 3909
3772 drbd_fail_pending_reads(mdev);
3773
3774 dev_info(DEV, "Connection closed\n"); 3910 dev_info(DEV, "Connection closed\n");
3775 3911
3776 drbd_md_sync(mdev); 3912 drbd_md_sync(mdev);
@@ -3781,12 +3917,8 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3781 put_ldev(mdev); 3917 put_ldev(mdev);
3782 } 3918 }
3783 3919
3784 if (mdev->state.role == R_PRIMARY) { 3920 if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
3785 if (fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN) { 3921 drbd_try_outdate_peer_async(mdev);
3786 enum drbd_disk_state nps = drbd_try_outdate_peer(mdev);
3787 drbd_request_state(mdev, NS(pdsk, nps));
3788 }
3789 }
3790 3922
3791 spin_lock_irq(&mdev->req_lock); 3923 spin_lock_irq(&mdev->req_lock);
3792 os = mdev->state; 3924 os = mdev->state;
@@ -3799,32 +3931,14 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3799 spin_unlock_irq(&mdev->req_lock); 3931 spin_unlock_irq(&mdev->req_lock);
3800 3932
3801 if (os.conn == C_DISCONNECTING) { 3933 if (os.conn == C_DISCONNECTING) {
3802 struct hlist_head *h; 3934 wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0);
3803 wait_event(mdev->misc_wait, atomic_read(&mdev->net_cnt) == 0);
3804 3935
3805 /* we must not free the tl_hash 3936 if (!is_susp(mdev->state)) {
3806 * while application io is still on the fly */ 3937 /* we must not free the tl_hash
3807 wait_event(mdev->misc_wait, atomic_read(&mdev->ap_bio_cnt) == 0); 3938 * while application io is still on the fly */
3808 3939 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
3809 spin_lock_irq(&mdev->req_lock); 3940 drbd_free_tl_hash(mdev);
3810 /* paranoia code */ 3941 }
3811 for (h = mdev->ee_hash; h < mdev->ee_hash + mdev->ee_hash_s; h++)
3812 if (h->first)
3813 dev_err(DEV, "ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n",
3814 (int)(h - mdev->ee_hash), h->first);
3815 kfree(mdev->ee_hash);
3816 mdev->ee_hash = NULL;
3817 mdev->ee_hash_s = 0;
3818
3819 /* paranoia code */
3820 for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++)
3821 if (h->first)
3822 dev_err(DEV, "ASSERT FAILED tl_hash[%u] == %p, expected NULL\n",
3823 (int)(h - mdev->tl_hash), h->first);
3824 kfree(mdev->tl_hash);
3825 mdev->tl_hash = NULL;
3826 mdev->tl_hash_s = 0;
3827 spin_unlock_irq(&mdev->req_lock);
3828 3942
3829 crypto_free_hash(mdev->cram_hmac_tfm); 3943 crypto_free_hash(mdev->cram_hmac_tfm);
3830 mdev->cram_hmac_tfm = NULL; 3944 mdev->cram_hmac_tfm = NULL;
@@ -3844,6 +3958,9 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3844 i = drbd_release_ee(mdev, &mdev->net_ee); 3958 i = drbd_release_ee(mdev, &mdev->net_ee);
3845 if (i) 3959 if (i)
3846 dev_info(DEV, "net_ee not empty, killed %u entries\n", i); 3960 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
3961 i = atomic_read(&mdev->pp_in_use_by_net);
3962 if (i)
3963 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
3847 i = atomic_read(&mdev->pp_in_use); 3964 i = atomic_read(&mdev->pp_in_use);
3848 if (i) 3965 if (i)
3849 dev_info(DEV, "pp_in_use = %d, expected 0\n", i); 3966 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
@@ -3887,7 +4004,7 @@ static int drbd_send_handshake(struct drbd_conf *mdev)
3887 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); 4004 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
3888 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); 4005 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
3889 ok = _drbd_send_cmd( mdev, mdev->data.socket, P_HAND_SHAKE, 4006 ok = _drbd_send_cmd( mdev, mdev->data.socket, P_HAND_SHAKE,
3890 (struct p_header *)p, sizeof(*p), 0 ); 4007 (struct p_header80 *)p, sizeof(*p), 0 );
3891 mutex_unlock(&mdev->data.mutex); 4008 mutex_unlock(&mdev->data.mutex);
3892 return ok; 4009 return ok;
3893} 4010}
@@ -3903,27 +4020,28 @@ static int drbd_do_handshake(struct drbd_conf *mdev)
3903{ 4020{
3904 /* ASSERT current == mdev->receiver ... */ 4021 /* ASSERT current == mdev->receiver ... */
3905 struct p_handshake *p = &mdev->data.rbuf.handshake; 4022 struct p_handshake *p = &mdev->data.rbuf.handshake;
3906 const int expect = sizeof(struct p_handshake) 4023 const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
3907 -sizeof(struct p_header); 4024 unsigned int length;
4025 enum drbd_packets cmd;
3908 int rv; 4026 int rv;
3909 4027
3910 rv = drbd_send_handshake(mdev); 4028 rv = drbd_send_handshake(mdev);
3911 if (!rv) 4029 if (!rv)
3912 return 0; 4030 return 0;
3913 4031
3914 rv = drbd_recv_header(mdev, &p->head); 4032 rv = drbd_recv_header(mdev, &cmd, &length);
3915 if (!rv) 4033 if (!rv)
3916 return 0; 4034 return 0;
3917 4035
3918 if (p->head.command != P_HAND_SHAKE) { 4036 if (cmd != P_HAND_SHAKE) {
3919 dev_err(DEV, "expected HandShake packet, received: %s (0x%04x)\n", 4037 dev_err(DEV, "expected HandShake packet, received: %s (0x%04x)\n",
3920 cmdname(p->head.command), p->head.command); 4038 cmdname(cmd), cmd);
3921 return -1; 4039 return -1;
3922 } 4040 }
3923 4041
3924 if (p->head.length != expect) { 4042 if (length != expect) {
3925 dev_err(DEV, "expected HandShake length: %u, received: %u\n", 4043 dev_err(DEV, "expected HandShake length: %u, received: %u\n",
3926 expect, p->head.length); 4044 expect, length);
3927 return -1; 4045 return -1;
3928 } 4046 }
3929 4047
@@ -3981,10 +4099,11 @@ static int drbd_do_auth(struct drbd_conf *mdev)
3981 char *response = NULL; 4099 char *response = NULL;
3982 char *right_response = NULL; 4100 char *right_response = NULL;
3983 char *peers_ch = NULL; 4101 char *peers_ch = NULL;
3984 struct p_header p;
3985 unsigned int key_len = strlen(mdev->net_conf->shared_secret); 4102 unsigned int key_len = strlen(mdev->net_conf->shared_secret);
3986 unsigned int resp_size; 4103 unsigned int resp_size;
3987 struct hash_desc desc; 4104 struct hash_desc desc;
4105 enum drbd_packets cmd;
4106 unsigned int length;
3988 int rv; 4107 int rv;
3989 4108
3990 desc.tfm = mdev->cram_hmac_tfm; 4109 desc.tfm = mdev->cram_hmac_tfm;
@@ -4004,33 +4123,33 @@ static int drbd_do_auth(struct drbd_conf *mdev)
4004 if (!rv) 4123 if (!rv)
4005 goto fail; 4124 goto fail;
4006 4125
4007 rv = drbd_recv_header(mdev, &p); 4126 rv = drbd_recv_header(mdev, &cmd, &length);
4008 if (!rv) 4127 if (!rv)
4009 goto fail; 4128 goto fail;
4010 4129
4011 if (p.command != P_AUTH_CHALLENGE) { 4130 if (cmd != P_AUTH_CHALLENGE) {
4012 dev_err(DEV, "expected AuthChallenge packet, received: %s (0x%04x)\n", 4131 dev_err(DEV, "expected AuthChallenge packet, received: %s (0x%04x)\n",
4013 cmdname(p.command), p.command); 4132 cmdname(cmd), cmd);
4014 rv = 0; 4133 rv = 0;
4015 goto fail; 4134 goto fail;
4016 } 4135 }
4017 4136
4018 if (p.length > CHALLENGE_LEN*2) { 4137 if (length > CHALLENGE_LEN * 2) {
4019 dev_err(DEV, "expected AuthChallenge payload too big.\n"); 4138 dev_err(DEV, "expected AuthChallenge payload too big.\n");
4020 rv = -1; 4139 rv = -1;
4021 goto fail; 4140 goto fail;
4022 } 4141 }
4023 4142
4024 peers_ch = kmalloc(p.length, GFP_NOIO); 4143 peers_ch = kmalloc(length, GFP_NOIO);
4025 if (peers_ch == NULL) { 4144 if (peers_ch == NULL) {
4026 dev_err(DEV, "kmalloc of peers_ch failed\n"); 4145 dev_err(DEV, "kmalloc of peers_ch failed\n");
4027 rv = -1; 4146 rv = -1;
4028 goto fail; 4147 goto fail;
4029 } 4148 }
4030 4149
4031 rv = drbd_recv(mdev, peers_ch, p.length); 4150 rv = drbd_recv(mdev, peers_ch, length);
4032 4151
4033 if (rv != p.length) { 4152 if (rv != length) {
4034 dev_err(DEV, "short read AuthChallenge: l=%u\n", rv); 4153 dev_err(DEV, "short read AuthChallenge: l=%u\n", rv);
4035 rv = 0; 4154 rv = 0;
4036 goto fail; 4155 goto fail;
@@ -4045,7 +4164,7 @@ static int drbd_do_auth(struct drbd_conf *mdev)
4045 } 4164 }
4046 4165
4047 sg_init_table(&sg, 1); 4166 sg_init_table(&sg, 1);
4048 sg_set_buf(&sg, peers_ch, p.length); 4167 sg_set_buf(&sg, peers_ch, length);
4049 4168
4050 rv = crypto_hash_digest(&desc, &sg, sg.length, response); 4169 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4051 if (rv) { 4170 if (rv) {
@@ -4058,18 +4177,18 @@ static int drbd_do_auth(struct drbd_conf *mdev)
4058 if (!rv) 4177 if (!rv)
4059 goto fail; 4178 goto fail;
4060 4179
4061 rv = drbd_recv_header(mdev, &p); 4180 rv = drbd_recv_header(mdev, &cmd, &length);
4062 if (!rv) 4181 if (!rv)
4063 goto fail; 4182 goto fail;
4064 4183
4065 if (p.command != P_AUTH_RESPONSE) { 4184 if (cmd != P_AUTH_RESPONSE) {
4066 dev_err(DEV, "expected AuthResponse packet, received: %s (0x%04x)\n", 4185 dev_err(DEV, "expected AuthResponse packet, received: %s (0x%04x)\n",
4067 cmdname(p.command), p.command); 4186 cmdname(cmd), cmd);
4068 rv = 0; 4187 rv = 0;
4069 goto fail; 4188 goto fail;
4070 } 4189 }
4071 4190
4072 if (p.length != resp_size) { 4191 if (length != resp_size) {
4073 dev_err(DEV, "expected AuthResponse payload of wrong size\n"); 4192 dev_err(DEV, "expected AuthResponse payload of wrong size\n");
4074 rv = 0; 4193 rv = 0;
4075 goto fail; 4194 goto fail;
@@ -4154,7 +4273,7 @@ int drbdd_init(struct drbd_thread *thi)
4154 4273
4155/* ********* acknowledge sender ******** */ 4274/* ********* acknowledge sender ******** */
4156 4275
4157static int got_RqSReply(struct drbd_conf *mdev, struct p_header *h) 4276static int got_RqSReply(struct drbd_conf *mdev, struct p_header80 *h)
4158{ 4277{
4159 struct p_req_state_reply *p = (struct p_req_state_reply *)h; 4278 struct p_req_state_reply *p = (struct p_req_state_reply *)h;
4160 4279
@@ -4172,13 +4291,13 @@ static int got_RqSReply(struct drbd_conf *mdev, struct p_header *h)
4172 return TRUE; 4291 return TRUE;
4173} 4292}
4174 4293
4175static int got_Ping(struct drbd_conf *mdev, struct p_header *h) 4294static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h)
4176{ 4295{
4177 return drbd_send_ping_ack(mdev); 4296 return drbd_send_ping_ack(mdev);
4178 4297
4179} 4298}
4180 4299
4181static int got_PingAck(struct drbd_conf *mdev, struct p_header *h) 4300static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h)
4182{ 4301{
4183 /* restore idle timeout */ 4302 /* restore idle timeout */
4184 mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ; 4303 mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ;
@@ -4188,7 +4307,7 @@ static int got_PingAck(struct drbd_conf *mdev, struct p_header *h)
4188 return TRUE; 4307 return TRUE;
4189} 4308}
4190 4309
4191static int got_IsInSync(struct drbd_conf *mdev, struct p_header *h) 4310static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h)
4192{ 4311{
4193 struct p_block_ack *p = (struct p_block_ack *)h; 4312 struct p_block_ack *p = (struct p_block_ack *)h;
4194 sector_t sector = be64_to_cpu(p->sector); 4313 sector_t sector = be64_to_cpu(p->sector);
@@ -4198,11 +4317,15 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header *h)
4198 4317
4199 update_peer_seq(mdev, be32_to_cpu(p->seq_num)); 4318 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4200 4319
4201 drbd_rs_complete_io(mdev, sector); 4320 if (get_ldev(mdev)) {
4202 drbd_set_in_sync(mdev, sector, blksize); 4321 drbd_rs_complete_io(mdev, sector);
4203 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */ 4322 drbd_set_in_sync(mdev, sector, blksize);
4204 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT); 4323 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4324 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4325 put_ldev(mdev);
4326 }
4205 dec_rs_pending(mdev); 4327 dec_rs_pending(mdev);
4328 atomic_add(blksize >> 9, &mdev->rs_sect_in);
4206 4329
4207 return TRUE; 4330 return TRUE;
4208} 4331}
@@ -4258,7 +4381,7 @@ static int validate_req_change_req_state(struct drbd_conf *mdev,
4258 return TRUE; 4381 return TRUE;
4259} 4382}
4260 4383
4261static int got_BlockAck(struct drbd_conf *mdev, struct p_header *h) 4384static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h)
4262{ 4385{
4263 struct p_block_ack *p = (struct p_block_ack *)h; 4386 struct p_block_ack *p = (struct p_block_ack *)h;
4264 sector_t sector = be64_to_cpu(p->sector); 4387 sector_t sector = be64_to_cpu(p->sector);
@@ -4298,7 +4421,7 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header *h)
4298 _ack_id_to_req, __func__ , what); 4421 _ack_id_to_req, __func__ , what);
4299} 4422}
4300 4423
4301static int got_NegAck(struct drbd_conf *mdev, struct p_header *h) 4424static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h)
4302{ 4425{
4303 struct p_block_ack *p = (struct p_block_ack *)h; 4426 struct p_block_ack *p = (struct p_block_ack *)h;
4304 sector_t sector = be64_to_cpu(p->sector); 4427 sector_t sector = be64_to_cpu(p->sector);
@@ -4318,7 +4441,7 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header *h)
4318 _ack_id_to_req, __func__ , neg_acked); 4441 _ack_id_to_req, __func__ , neg_acked);
4319} 4442}
4320 4443
4321static int got_NegDReply(struct drbd_conf *mdev, struct p_header *h) 4444static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h)
4322{ 4445{
4323 struct p_block_ack *p = (struct p_block_ack *)h; 4446 struct p_block_ack *p = (struct p_block_ack *)h;
4324 sector_t sector = be64_to_cpu(p->sector); 4447 sector_t sector = be64_to_cpu(p->sector);
@@ -4331,7 +4454,7 @@ static int got_NegDReply(struct drbd_conf *mdev, struct p_header *h)
4331 _ar_id_to_req, __func__ , neg_acked); 4454 _ar_id_to_req, __func__ , neg_acked);
4332} 4455}
4333 4456
4334static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header *h) 4457static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h)
4335{ 4458{
4336 sector_t sector; 4459 sector_t sector;
4337 int size; 4460 int size;
@@ -4353,7 +4476,7 @@ static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header *h)
4353 return TRUE; 4476 return TRUE;
4354} 4477}
4355 4478
4356static int got_BarrierAck(struct drbd_conf *mdev, struct p_header *h) 4479static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h)
4357{ 4480{
4358 struct p_barrier_ack *p = (struct p_barrier_ack *)h; 4481 struct p_barrier_ack *p = (struct p_barrier_ack *)h;
4359 4482
@@ -4362,7 +4485,7 @@ static int got_BarrierAck(struct drbd_conf *mdev, struct p_header *h)
4362 return TRUE; 4485 return TRUE;
4363} 4486}
4364 4487
4365static int got_OVResult(struct drbd_conf *mdev, struct p_header *h) 4488static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h)
4366{ 4489{
4367 struct p_block_ack *p = (struct p_block_ack *)h; 4490 struct p_block_ack *p = (struct p_block_ack *)h;
4368 struct drbd_work *w; 4491 struct drbd_work *w;
@@ -4379,6 +4502,9 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header *h)
4379 else 4502 else
4380 ov_oos_print(mdev); 4503 ov_oos_print(mdev);
4381 4504
4505 if (!get_ldev(mdev))
4506 return TRUE;
4507
4382 drbd_rs_complete_io(mdev, sector); 4508 drbd_rs_complete_io(mdev, sector);
4383 dec_rs_pending(mdev); 4509 dec_rs_pending(mdev);
4384 4510
@@ -4393,18 +4519,18 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header *h)
4393 drbd_resync_finished(mdev); 4519 drbd_resync_finished(mdev);
4394 } 4520 }
4395 } 4521 }
4522 put_ldev(mdev);
4396 return TRUE; 4523 return TRUE;
4397} 4524}
4398 4525
4399static int got_something_to_ignore_m(struct drbd_conf *mdev, struct p_header *h) 4526static int got_skip(struct drbd_conf *mdev, struct p_header80 *h)
4400{ 4527{
4401 /* IGNORE */
4402 return TRUE; 4528 return TRUE;
4403} 4529}
4404 4530
4405struct asender_cmd { 4531struct asender_cmd {
4406 size_t pkt_size; 4532 size_t pkt_size;
4407 int (*process)(struct drbd_conf *mdev, struct p_header *h); 4533 int (*process)(struct drbd_conf *mdev, struct p_header80 *h);
4408}; 4534};
4409 4535
4410static struct asender_cmd *get_asender_cmd(int cmd) 4536static struct asender_cmd *get_asender_cmd(int cmd)
@@ -4413,8 +4539,8 @@ static struct asender_cmd *get_asender_cmd(int cmd)
4413 /* anything missing from this table is in 4539 /* anything missing from this table is in
4414 * the drbd_cmd_handler (drbd_default_handler) table, 4540 * the drbd_cmd_handler (drbd_default_handler) table,
4415 * see the beginning of drbdd() */ 4541 * see the beginning of drbdd() */
4416 [P_PING] = { sizeof(struct p_header), got_Ping }, 4542 [P_PING] = { sizeof(struct p_header80), got_Ping },
4417 [P_PING_ACK] = { sizeof(struct p_header), got_PingAck }, 4543 [P_PING_ACK] = { sizeof(struct p_header80), got_PingAck },
4418 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 4544 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4419 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 4545 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4420 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 4546 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
@@ -4426,7 +4552,7 @@ static struct asender_cmd *get_asender_cmd(int cmd)
4426 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, 4552 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
4427 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, 4553 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
4428 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, 4554 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
4429 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe), got_something_to_ignore_m }, 4555 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
4430 [P_MAX_CMD] = { 0, NULL }, 4556 [P_MAX_CMD] = { 0, NULL },
4431 }; 4557 };
4432 if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL) 4558 if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)
@@ -4437,13 +4563,13 @@ static struct asender_cmd *get_asender_cmd(int cmd)
4437int drbd_asender(struct drbd_thread *thi) 4563int drbd_asender(struct drbd_thread *thi)
4438{ 4564{
4439 struct drbd_conf *mdev = thi->mdev; 4565 struct drbd_conf *mdev = thi->mdev;
4440 struct p_header *h = &mdev->meta.rbuf.header; 4566 struct p_header80 *h = &mdev->meta.rbuf.header.h80;
4441 struct asender_cmd *cmd = NULL; 4567 struct asender_cmd *cmd = NULL;
4442 4568
4443 int rv, len; 4569 int rv, len;
4444 void *buf = h; 4570 void *buf = h;
4445 int received = 0; 4571 int received = 0;
4446 int expect = sizeof(struct p_header); 4572 int expect = sizeof(struct p_header80);
4447 int empty; 4573 int empty;
4448 4574
4449 sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); 4575 sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev));
@@ -4467,10 +4593,8 @@ int drbd_asender(struct drbd_thread *thi)
4467 while (1) { 4593 while (1) {
4468 clear_bit(SIGNAL_ASENDER, &mdev->flags); 4594 clear_bit(SIGNAL_ASENDER, &mdev->flags);
4469 flush_signals(current); 4595 flush_signals(current);
4470 if (!drbd_process_done_ee(mdev)) { 4596 if (!drbd_process_done_ee(mdev))
4471 dev_err(DEV, "process_done_ee() = NOT_OK\n");
4472 goto reconnect; 4597 goto reconnect;
4473 }
4474 /* to avoid race with newly queued ACKs */ 4598 /* to avoid race with newly queued ACKs */
4475 set_bit(SIGNAL_ASENDER, &mdev->flags); 4599 set_bit(SIGNAL_ASENDER, &mdev->flags);
4476 spin_lock_irq(&mdev->req_lock); 4600 spin_lock_irq(&mdev->req_lock);
@@ -4529,21 +4653,23 @@ int drbd_asender(struct drbd_thread *thi)
4529 4653
4530 if (received == expect && cmd == NULL) { 4654 if (received == expect && cmd == NULL) {
4531 if (unlikely(h->magic != BE_DRBD_MAGIC)) { 4655 if (unlikely(h->magic != BE_DRBD_MAGIC)) {
4532 dev_err(DEV, "magic?? on meta m: 0x%lx c: %d l: %d\n", 4656 dev_err(DEV, "magic?? on meta m: 0x%08x c: %d l: %d\n",
4533 (long)be32_to_cpu(h->magic), 4657 be32_to_cpu(h->magic),
4534 h->command, h->length); 4658 be16_to_cpu(h->command),
4659 be16_to_cpu(h->length));
4535 goto reconnect; 4660 goto reconnect;
4536 } 4661 }
4537 cmd = get_asender_cmd(be16_to_cpu(h->command)); 4662 cmd = get_asender_cmd(be16_to_cpu(h->command));
4538 len = be16_to_cpu(h->length); 4663 len = be16_to_cpu(h->length);
4539 if (unlikely(cmd == NULL)) { 4664 if (unlikely(cmd == NULL)) {
4540 dev_err(DEV, "unknown command?? on meta m: 0x%lx c: %d l: %d\n", 4665 dev_err(DEV, "unknown command?? on meta m: 0x%08x c: %d l: %d\n",
4541 (long)be32_to_cpu(h->magic), 4666 be32_to_cpu(h->magic),
4542 h->command, h->length); 4667 be16_to_cpu(h->command),
4668 be16_to_cpu(h->length));
4543 goto disconnect; 4669 goto disconnect;
4544 } 4670 }
4545 expect = cmd->pkt_size; 4671 expect = cmd->pkt_size;
4546 ERR_IF(len != expect-sizeof(struct p_header)) 4672 ERR_IF(len != expect-sizeof(struct p_header80))
4547 goto reconnect; 4673 goto reconnect;
4548 } 4674 }
4549 if (received == expect) { 4675 if (received == expect) {
@@ -4553,7 +4679,7 @@ int drbd_asender(struct drbd_thread *thi)
4553 4679
4554 buf = h; 4680 buf = h;
4555 received = 0; 4681 received = 0;
4556 expect = sizeof(struct p_header); 4682 expect = sizeof(struct p_header80);
4557 cmd = NULL; 4683 cmd = NULL;
4558 } 4684 }
4559 } 4685 }
@@ -4561,10 +4687,12 @@ int drbd_asender(struct drbd_thread *thi)
4561 if (0) { 4687 if (0) {
4562reconnect: 4688reconnect:
4563 drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE)); 4689 drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE));
4690 drbd_md_sync(mdev);
4564 } 4691 }
4565 if (0) { 4692 if (0) {
4566disconnect: 4693disconnect:
4567 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 4694 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
4695 drbd_md_sync(mdev);
4568 } 4696 }
4569 clear_bit(SIGNAL_ASENDER, &mdev->flags); 4697 clear_bit(SIGNAL_ASENDER, &mdev->flags);
4570 4698