aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-05-25 12:15:35 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-25 12:15:35 -0400
commit929cfdd5d3bdc772aff32e5a3fb4e3894394aa75 (patch)
treef67202d079eaf1f8d65b2e1bfac70b768ae34bc4 /drivers/block/drbd
parent798ce8f1cca29dcc3f4b55947f611f4ffb32ac2b (diff)
parenta1c15c59feee36267c43142a41152fbf7402afb6 (diff)
Merge branch 'for-2.6.40/drivers' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.40/drivers' of git://git.kernel.dk/linux-2.6-block: (110 commits) loop: handle on-demand devices correctly loop: limit 'max_part' module param to DISK_MAX_PARTS drbd: fix warning drbd: fix warning drbd: Fix spelling drbd: fix schedule in atomic drbd: Take a more conservative approach when deciding max_bio_size drbd: Fixed state transitions after async outdate-peer-handler returned drbd: Disallow the peer_disk_state to be D_OUTDATED while connected drbd: Fix for the connection problems on high latency links drbd: fix potential activity log refcount imbalance in error path drbd: Only downgrade the disk state in case of disk failures drbd: fix disconnect/reconnect loop, if ping-timeout == ping-int drbd: fix potential distributed deadlock lru_cache.h: fix comments referring to ts_ instead of lc_ drbd: Fix for application IO with the on-io-error=pass-on policy xen/p2m: Add EXPORT_SYMBOL_GPL to the M2P override functions. xen/p2m/m2p/gnttab: Support GNTMAP_host_map in the M2P override. xen/blkback: don't fail empty barrier requests xen/blkback: fix xenbus_transaction_start() hang caused by double xenbus_transaction_end() ...
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r--drivers/block/drbd/drbd_actlog.c2
-rw-r--r--drivers/block/drbd/drbd_bitmap.c6
-rw-r--r--drivers/block/drbd/drbd_int.h19
-rw-r--r--drivers/block/drbd/drbd_main.c37
-rw-r--r--drivers/block/drbd/drbd_nl.c127
-rw-r--r--drivers/block/drbd/drbd_receiver.c68
-rw-r--r--drivers/block/drbd/drbd_req.c20
-rw-r--r--drivers/block/drbd/drbd_req.h5
-rw-r--r--drivers/block/drbd/drbd_worker.c98
9 files changed, 243 insertions, 139 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index c6828b68d77b..09ef9a878ef0 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -28,7 +28,7 @@
28#include "drbd_int.h" 28#include "drbd_int.h"
29#include "drbd_wrappers.h" 29#include "drbd_wrappers.h"
30 30
31/* We maintain a trivial check sum in our on disk activity log. 31/* We maintain a trivial checksum in our on disk activity log.
32 * With that we can ensure correct operation even when the storage 32 * With that we can ensure correct operation even when the storage
33 * device might do a partial (last) sector write while losing power. 33 * device might do a partial (last) sector write while losing power.
34 */ 34 */
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 76210ba401ac..f440a02dfdb1 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -74,7 +74,7 @@
74 * as we are "attached" to a local disk, which at 32 GiB for 1PiB storage 74 * as we are "attached" to a local disk, which at 32 GiB for 1PiB storage
75 * seems excessive. 75 * seems excessive.
76 * 76 *
77 * We plan to reduce the amount of in-core bitmap pages by pageing them in 77 * We plan to reduce the amount of in-core bitmap pages by paging them in
78 * and out against their on-disk location as necessary, but need to make 78 * and out against their on-disk location as necessary, but need to make
79 * sure we don't cause too much meta data IO, and must not deadlock in 79 * sure we don't cause too much meta data IO, and must not deadlock in
80 * tight memory situations. This needs some more work. 80 * tight memory situations. This needs some more work.
@@ -200,7 +200,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
200 * we if bits have been cleared since last IO. */ 200 * we if bits have been cleared since last IO. */
201#define BM_PAGE_LAZY_WRITEOUT 28 201#define BM_PAGE_LAZY_WRITEOUT 28
202 202
203/* store_page_idx uses non-atomic assingment. It is only used directly after 203/* store_page_idx uses non-atomic assignment. It is only used directly after
204 * allocating the page. All other bm_set_page_* and bm_clear_page_* need to 204 * allocating the page. All other bm_set_page_* and bm_clear_page_* need to
205 * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap 205 * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap
206 * changes) may happen from various contexts, and wait_on_bit/wake_up_bit 206 * changes) may happen from various contexts, and wait_on_bit/wake_up_bit
@@ -318,7 +318,7 @@ static void bm_unmap(unsigned long *p_addr)
318/* word offset from start of bitmap to word number _in_page_ 318/* word offset from start of bitmap to word number _in_page_
319 * modulo longs per page 319 * modulo longs per page
320#define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long)) 320#define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long))
321 hm, well, Philipp thinks gcc might not optimze the % into & (... - 1) 321 hm, well, Philipp thinks gcc might not optimize the % into & (... - 1)
322 so do it explicitly: 322 so do it explicitly:
323 */ 323 */
324#define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1)) 324#define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1))
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d871b14ed5a1..ef2ceed3be4b 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -700,7 +700,7 @@ struct drbd_request {
700 * see drbd_endio_pri(). */ 700 * see drbd_endio_pri(). */
701 struct bio *private_bio; 701 struct bio *private_bio;
702 702
703 struct hlist_node colision; 703 struct hlist_node collision;
704 sector_t sector; 704 sector_t sector;
705 unsigned int size; 705 unsigned int size;
706 unsigned int epoch; /* barrier_nr */ 706 unsigned int epoch; /* barrier_nr */
@@ -766,7 +766,7 @@ struct digest_info {
766 766
767struct drbd_epoch_entry { 767struct drbd_epoch_entry {
768 struct drbd_work w; 768 struct drbd_work w;
769 struct hlist_node colision; 769 struct hlist_node collision;
770 struct drbd_epoch *epoch; /* for writes */ 770 struct drbd_epoch *epoch; /* for writes */
771 struct drbd_conf *mdev; 771 struct drbd_conf *mdev;
772 struct page *pages; 772 struct page *pages;
@@ -1129,6 +1129,8 @@ struct drbd_conf {
1129 int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */ 1129 int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
1130 int rs_planed; /* resync sectors already planned */ 1130 int rs_planed; /* resync sectors already planned */
1131 atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */ 1131 atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */
1132 int peer_max_bio_size;
1133 int local_max_bio_size;
1132}; 1134};
1133 1135
1134static inline struct drbd_conf *minor_to_mdev(unsigned int minor) 1136static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
@@ -1218,8 +1220,6 @@ extern void drbd_free_resources(struct drbd_conf *mdev);
1218extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, 1220extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
1219 unsigned int set_size); 1221 unsigned int set_size);
1220extern void tl_clear(struct drbd_conf *mdev); 1222extern void tl_clear(struct drbd_conf *mdev);
1221enum drbd_req_event;
1222extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what);
1223extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *); 1223extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *);
1224extern void drbd_free_sock(struct drbd_conf *mdev); 1224extern void drbd_free_sock(struct drbd_conf *mdev);
1225extern int drbd_send(struct drbd_conf *mdev, struct socket *sock, 1225extern int drbd_send(struct drbd_conf *mdev, struct socket *sock,
@@ -1434,6 +1434,7 @@ struct bm_extent {
1434 * hash table. */ 1434 * hash table. */
1435#define HT_SHIFT 8 1435#define HT_SHIFT 8
1436#define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT)) 1436#define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT))
1437#define DRBD_MAX_BIO_SIZE_SAFE (1 << 12) /* Works always = 4k */
1437 1438
1438#define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */ 1439#define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */
1439 1440
@@ -1518,9 +1519,9 @@ extern void drbd_resume_io(struct drbd_conf *mdev);
1518extern char *ppsize(char *buf, unsigned long long size); 1519extern char *ppsize(char *buf, unsigned long long size);
1519extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int); 1520extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int);
1520enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 }; 1521enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
1521extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); 1522extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
1522extern void resync_after_online_grow(struct drbd_conf *); 1523extern void resync_after_online_grow(struct drbd_conf *);
1523extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local); 1524extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
1524extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev, 1525extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
1525 enum drbd_role new_role, 1526 enum drbd_role new_role,
1526 int force); 1527 int force);
@@ -1828,6 +1829,8 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach,
1828 if (!forcedetach) { 1829 if (!forcedetach) {
1829 if (__ratelimit(&drbd_ratelimit_state)) 1830 if (__ratelimit(&drbd_ratelimit_state))
1830 dev_err(DEV, "Local IO failed in %s.\n", where); 1831 dev_err(DEV, "Local IO failed in %s.\n", where);
1832 if (mdev->state.disk > D_INCONSISTENT)
1833 _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL);
1831 break; 1834 break;
1832 } 1835 }
1833 /* NOTE fall through to detach case if forcedetach set */ 1836 /* NOTE fall through to detach case if forcedetach set */
@@ -2153,6 +2156,10 @@ static inline int get_net_conf(struct drbd_conf *mdev)
2153static inline void put_ldev(struct drbd_conf *mdev) 2156static inline void put_ldev(struct drbd_conf *mdev)
2154{ 2157{
2155 int i = atomic_dec_return(&mdev->local_cnt); 2158 int i = atomic_dec_return(&mdev->local_cnt);
2159
2160 /* This may be called from some endio handler,
2161 * so we must not sleep here. */
2162
2156 __release(local); 2163 __release(local);
2157 D_ASSERT(i >= 0); 2164 D_ASSERT(i >= 0);
2158 if (i == 0) { 2165 if (i == 0) {
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 5b525c179f39..0358e55356c8 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -745,6 +745,9 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
745 mdev->agreed_pro_version < 88) 745 mdev->agreed_pro_version < 88)
746 rv = SS_NOT_SUPPORTED; 746 rv = SS_NOT_SUPPORTED;
747 747
748 else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
749 rv = SS_CONNECTED_OUTDATES;
750
748 return rv; 751 return rv;
749} 752}
750 753
@@ -1565,6 +1568,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1565 put_ldev(mdev); 1568 put_ldev(mdev);
1566 } 1569 }
1567 1570
1571 /* Notify peer that I had a local IO error, and did not detached.. */
1572 if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT)
1573 drbd_send_state(mdev);
1574
1568 /* Disks got bigger while they were detached */ 1575 /* Disks got bigger while they were detached */
1569 if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && 1576 if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
1570 test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) { 1577 test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) {
@@ -2064,7 +2071,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
2064{ 2071{
2065 struct p_sizes p; 2072 struct p_sizes p;
2066 sector_t d_size, u_size; 2073 sector_t d_size, u_size;
2067 int q_order_type; 2074 int q_order_type, max_bio_size;
2068 int ok; 2075 int ok;
2069 2076
2070 if (get_ldev_if_state(mdev, D_NEGOTIATING)) { 2077 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
@@ -2072,17 +2079,20 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
2072 d_size = drbd_get_max_capacity(mdev->ldev); 2079 d_size = drbd_get_max_capacity(mdev->ldev);
2073 u_size = mdev->ldev->dc.disk_size; 2080 u_size = mdev->ldev->dc.disk_size;
2074 q_order_type = drbd_queue_order_type(mdev); 2081 q_order_type = drbd_queue_order_type(mdev);
2082 max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
2083 max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE);
2075 put_ldev(mdev); 2084 put_ldev(mdev);
2076 } else { 2085 } else {
2077 d_size = 0; 2086 d_size = 0;
2078 u_size = 0; 2087 u_size = 0;
2079 q_order_type = QUEUE_ORDERED_NONE; 2088 q_order_type = QUEUE_ORDERED_NONE;
2089 max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
2080 } 2090 }
2081 2091
2082 p.d_size = cpu_to_be64(d_size); 2092 p.d_size = cpu_to_be64(d_size);
2083 p.u_size = cpu_to_be64(u_size); 2093 p.u_size = cpu_to_be64(u_size);
2084 p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); 2094 p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
2085 p.max_bio_size = cpu_to_be32(queue_max_hw_sectors(mdev->rq_queue) << 9); 2095 p.max_bio_size = cpu_to_be32(max_bio_size);
2086 p.queue_order_type = cpu_to_be16(q_order_type); 2096 p.queue_order_type = cpu_to_be16(q_order_type);
2087 p.dds_flags = cpu_to_be16(flags); 2097 p.dds_flags = cpu_to_be16(flags);
2088 2098
@@ -2722,7 +2732,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
2722 2732
2723 /* double check digest, sometimes buffers have been modified in flight. */ 2733 /* double check digest, sometimes buffers have been modified in flight. */
2724 if (dgs > 0 && dgs <= 64) { 2734 if (dgs > 0 && dgs <= 64) {
2725 /* 64 byte, 512 bit, is the larges digest size 2735 /* 64 byte, 512 bit, is the largest digest size
2726 * currently supported in kernel crypto. */ 2736 * currently supported in kernel crypto. */
2727 unsigned char digest[64]; 2737 unsigned char digest[64];
2728 drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest); 2738 drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest);
@@ -3041,6 +3051,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
3041 mdev->agreed_pro_version = PRO_VERSION_MAX; 3051 mdev->agreed_pro_version = PRO_VERSION_MAX;
3042 mdev->write_ordering = WO_bdev_flush; 3052 mdev->write_ordering = WO_bdev_flush;
3043 mdev->resync_wenr = LC_FREE; 3053 mdev->resync_wenr = LC_FREE;
3054 mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
3055 mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
3044} 3056}
3045 3057
3046void drbd_mdev_cleanup(struct drbd_conf *mdev) 3058void drbd_mdev_cleanup(struct drbd_conf *mdev)
@@ -3275,7 +3287,7 @@ static void drbd_delete_device(unsigned int minor)
3275 3287
3276 drbd_release_ee_lists(mdev); 3288 drbd_release_ee_lists(mdev);
3277 3289
3278 /* should be free'd on disconnect? */ 3290 /* should be freed on disconnect? */
3279 kfree(mdev->ee_hash); 3291 kfree(mdev->ee_hash);
3280 /* 3292 /*
3281 mdev->ee_hash_s = 0; 3293 mdev->ee_hash_s = 0;
@@ -3415,7 +3427,9 @@ struct drbd_conf *drbd_new_device(unsigned int minor)
3415 q->backing_dev_info.congested_data = mdev; 3427 q->backing_dev_info.congested_data = mdev;
3416 3428
3417 blk_queue_make_request(q, drbd_make_request); 3429 blk_queue_make_request(q, drbd_make_request);
3418 blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE >> 9); 3430 /* Setting the max_hw_sectors to an odd value of 8kibyte here
3431 This triggers a max_bio_size message upon first attach or connect */
3432 blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
3419 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); 3433 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
3420 blk_queue_merge_bvec(q, drbd_merge_bvec); 3434 blk_queue_merge_bvec(q, drbd_merge_bvec);
3421 q->queue_lock = &mdev->req_lock; 3435 q->queue_lock = &mdev->req_lock;
@@ -3627,7 +3641,8 @@ struct meta_data_on_disk {
3627 /* `-- act_log->nr_elements <-- sync_conf.al_extents */ 3641 /* `-- act_log->nr_elements <-- sync_conf.al_extents */
3628 u32 bm_offset; /* offset to the bitmap, from here */ 3642 u32 bm_offset; /* offset to the bitmap, from here */
3629 u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */ 3643 u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */
3630 u32 reserved_u32[4]; 3644 u32 la_peer_max_bio_size; /* last peer max_bio_size */
3645 u32 reserved_u32[3];
3631 3646
3632} __packed; 3647} __packed;
3633 3648
@@ -3668,6 +3683,7 @@ void drbd_md_sync(struct drbd_conf *mdev)
3668 buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid); 3683 buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid);
3669 3684
3670 buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset); 3685 buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset);
3686 buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size);
3671 3687
3672 D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset); 3688 D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset);
3673 sector = mdev->ldev->md.md_offset; 3689 sector = mdev->ldev->md.md_offset;
@@ -3751,6 +3767,15 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
3751 mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents); 3767 mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents);
3752 bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid); 3768 bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
3753 3769
3770 spin_lock_irq(&mdev->req_lock);
3771 if (mdev->state.conn < C_CONNECTED) {
3772 int peer;
3773 peer = be32_to_cpu(buffer->la_peer_max_bio_size);
3774 peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE);
3775 mdev->peer_max_bio_size = peer;
3776 }
3777 spin_unlock_irq(&mdev->req_lock);
3778
3754 if (mdev->sync_conf.al_extents < 7) 3779 if (mdev->sync_conf.al_extents < 7)
3755 mdev->sync_conf.al_extents = 127; 3780 mdev->sync_conf.al_extents = 127;
3756 3781
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 03b29f78a37d..515bcd948a43 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -272,9 +272,28 @@ static int _try_outdate_peer_async(void *data)
272{ 272{
273 struct drbd_conf *mdev = (struct drbd_conf *)data; 273 struct drbd_conf *mdev = (struct drbd_conf *)data;
274 enum drbd_disk_state nps; 274 enum drbd_disk_state nps;
275 union drbd_state ns;
275 276
276 nps = drbd_try_outdate_peer(mdev); 277 nps = drbd_try_outdate_peer(mdev);
277 drbd_request_state(mdev, NS(pdsk, nps)); 278
279 /* Not using
280 drbd_request_state(mdev, NS(pdsk, nps));
281 here, because we might were able to re-establish the connection
282 in the meantime. This can only partially be solved in the state's
283 engine is_valid_state() and is_valid_state_transition()
284 functions.
285
286 nps can be D_INCONSISTENT, D_OUTDATED or D_UNKNOWN.
287 pdsk == D_INCONSISTENT while conn >= C_CONNECTED is valid,
288 therefore we have to have the pre state change check here.
289 */
290 spin_lock_irq(&mdev->req_lock);
291 ns = mdev->state;
292 if (ns.conn < C_WF_REPORT_PARAMS) {
293 ns.pdsk = nps;
294 _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
295 }
296 spin_unlock_irq(&mdev->req_lock);
278 297
279 return 0; 298 return 0;
280} 299}
@@ -577,7 +596,7 @@ void drbd_resume_io(struct drbd_conf *mdev)
577 * Returns 0 on success, negative return values indicate errors. 596 * Returns 0 on success, negative return values indicate errors.
578 * You should call drbd_md_sync() after calling this function. 597 * You should call drbd_md_sync() after calling this function.
579 */ 598 */
580enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local) 599enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
581{ 600{
582 sector_t prev_first_sect, prev_size; /* previous meta location */ 601 sector_t prev_first_sect, prev_size; /* previous meta location */
583 sector_t la_size; 602 sector_t la_size;
@@ -773,30 +792,78 @@ static int drbd_check_al_size(struct drbd_conf *mdev)
773 return 0; 792 return 0;
774} 793}
775 794
776void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size) __must_hold(local) 795static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size)
777{ 796{
778 struct request_queue * const q = mdev->rq_queue; 797 struct request_queue * const q = mdev->rq_queue;
779 struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; 798 int max_hw_sectors = max_bio_size >> 9;
780 int max_segments = mdev->ldev->dc.max_bio_bvecs; 799 int max_segments = 0;
781 int max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); 800
801 if (get_ldev_if_state(mdev, D_ATTACHING)) {
802 struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
803
804 max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
805 max_segments = mdev->ldev->dc.max_bio_bvecs;
806 put_ldev(mdev);
807 }
782 808
783 blk_queue_logical_block_size(q, 512); 809 blk_queue_logical_block_size(q, 512);
784 blk_queue_max_hw_sectors(q, max_hw_sectors); 810 blk_queue_max_hw_sectors(q, max_hw_sectors);
785 /* This is the workaround for "bio would need to, but cannot, be split" */ 811 /* This is the workaround for "bio would need to, but cannot, be split" */
786 blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); 812 blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
787 blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1); 813 blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
788 blk_queue_stack_limits(q, b);
789 814
790 dev_info(DEV, "max BIO size = %u\n", queue_max_hw_sectors(q) << 9); 815 if (get_ldev_if_state(mdev, D_ATTACHING)) {
816 struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
817
818 blk_queue_stack_limits(q, b);
791 819
792 if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { 820 if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
793 dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", 821 dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
794 q->backing_dev_info.ra_pages, 822 q->backing_dev_info.ra_pages,
795 b->backing_dev_info.ra_pages); 823 b->backing_dev_info.ra_pages);
796 q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; 824 q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
825 }
826 put_ldev(mdev);
797 } 827 }
798} 828}
799 829
830void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
831{
832 int now, new, local, peer;
833
834 now = queue_max_hw_sectors(mdev->rq_queue) << 9;
835 local = mdev->local_max_bio_size; /* Eventually last known value, from volatile memory */
836 peer = mdev->peer_max_bio_size; /* Eventually last known value, from meta data */
837
838 if (get_ldev_if_state(mdev, D_ATTACHING)) {
839 local = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
840 mdev->local_max_bio_size = local;
841 put_ldev(mdev);
842 }
843
844 /* We may ignore peer limits if the peer is modern enough.
845 Because new from 8.3.8 onwards the peer can use multiple
846 BIOs for a single peer_request */
847 if (mdev->state.conn >= C_CONNECTED) {
848 if (mdev->agreed_pro_version < 94)
849 peer = mdev->peer_max_bio_size;
850 else if (mdev->agreed_pro_version == 94)
851 peer = DRBD_MAX_SIZE_H80_PACKET;
852 else /* drbd 8.3.8 onwards */
853 peer = DRBD_MAX_BIO_SIZE;
854 }
855
856 new = min_t(int, local, peer);
857
858 if (mdev->state.role == R_PRIMARY && new < now)
859 dev_err(DEV, "ASSERT FAILED new < now; (%d < %d)\n", new, now);
860
861 if (new != now)
862 dev_info(DEV, "max BIO size = %u\n", new);
863
864 drbd_setup_queue_param(mdev, new);
865}
866
800/* serialize deconfig (worker exiting, doing cleanup) 867/* serialize deconfig (worker exiting, doing cleanup)
801 * and reconfig (drbdsetup disk, drbdsetup net) 868 * and reconfig (drbdsetup disk, drbdsetup net)
802 * 869 *
@@ -865,7 +932,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
865 struct block_device *bdev; 932 struct block_device *bdev;
866 struct lru_cache *resync_lru = NULL; 933 struct lru_cache *resync_lru = NULL;
867 union drbd_state ns, os; 934 union drbd_state ns, os;
868 unsigned int max_bio_size;
869 enum drbd_state_rv rv; 935 enum drbd_state_rv rv;
870 int cp_discovered = 0; 936 int cp_discovered = 0;
871 int logical_block_size; 937 int logical_block_size;
@@ -1117,20 +1183,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1117 mdev->read_cnt = 0; 1183 mdev->read_cnt = 0;
1118 mdev->writ_cnt = 0; 1184 mdev->writ_cnt = 0;
1119 1185
1120 max_bio_size = DRBD_MAX_BIO_SIZE; 1186 drbd_reconsider_max_bio_size(mdev);
1121 if (mdev->state.conn == C_CONNECTED) {
1122 /* We are Primary, Connected, and now attach a new local
1123 * backing store. We must not increase the user visible maximum
1124 * bio size on this device to something the peer may not be
1125 * able to handle. */
1126 if (mdev->agreed_pro_version < 94)
1127 max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9;
1128 else if (mdev->agreed_pro_version == 94)
1129 max_bio_size = DRBD_MAX_SIZE_H80_PACKET;
1130 /* else: drbd 8.3.9 and later, stay with default */
1131 }
1132
1133 drbd_setup_queue_param(mdev, max_bio_size);
1134 1187
1135 /* If I am currently not R_PRIMARY, 1188 /* If I am currently not R_PRIMARY,
1136 * but meta data primary indicator is set, 1189 * but meta data primary indicator is set,
@@ -1152,7 +1205,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1152 !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND)) 1205 !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
1153 set_bit(USE_DEGR_WFC_T, &mdev->flags); 1206 set_bit(USE_DEGR_WFC_T, &mdev->flags);
1154 1207
1155 dd = drbd_determin_dev_size(mdev, 0); 1208 dd = drbd_determine_dev_size(mdev, 0);
1156 if (dd == dev_size_error) { 1209 if (dd == dev_size_error) {
1157 retcode = ERR_NOMEM_BITMAP; 1210 retcode = ERR_NOMEM_BITMAP;
1158 goto force_diskless_dec; 1211 goto force_diskless_dec;
@@ -1281,11 +1334,19 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1281static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 1334static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1282 struct drbd_nl_cfg_reply *reply) 1335 struct drbd_nl_cfg_reply *reply)
1283{ 1336{
1337 enum drbd_ret_code retcode;
1338 int ret;
1284 drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ 1339 drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
1285 reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS)); 1340 retcode = drbd_request_state(mdev, NS(disk, D_FAILED));
1286 if (mdev->state.disk == D_DISKLESS) 1341 /* D_FAILED will transition to DISKLESS. */
1287 wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); 1342 ret = wait_event_interruptible(mdev->misc_wait,
1343 mdev->state.disk != D_FAILED);
1288 drbd_resume_io(mdev); 1344 drbd_resume_io(mdev);
1345 if ((int)retcode == (int)SS_IS_DISKLESS)
1346 retcode = SS_NOTHING_TO_DO;
1347 if (ret)
1348 retcode = ERR_INTR;
1349 reply->ret_code = retcode;
1289 return 0; 1350 return 0;
1290} 1351}
1291 1352
@@ -1658,7 +1719,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1658 1719
1659 mdev->ldev->dc.disk_size = (sector_t)rs.resize_size; 1720 mdev->ldev->dc.disk_size = (sector_t)rs.resize_size;
1660 ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0); 1721 ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
1661 dd = drbd_determin_dev_size(mdev, ddsf); 1722 dd = drbd_determine_dev_size(mdev, ddsf);
1662 drbd_md_sync(mdev); 1723 drbd_md_sync(mdev);
1663 put_ldev(mdev); 1724 put_ldev(mdev);
1664 if (dd == dev_size_error) { 1725 if (dd == dev_size_error) {
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index fd26666c0b08..25d32c5aa50a 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -333,7 +333,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
333 if (!page) 333 if (!page)
334 goto fail; 334 goto fail;
335 335
336 INIT_HLIST_NODE(&e->colision); 336 INIT_HLIST_NODE(&e->collision);
337 e->epoch = NULL; 337 e->epoch = NULL;
338 e->mdev = mdev; 338 e->mdev = mdev;
339 e->pages = page; 339 e->pages = page;
@@ -356,7 +356,7 @@ void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int i
356 kfree(e->digest); 356 kfree(e->digest);
357 drbd_pp_free(mdev, e->pages, is_net); 357 drbd_pp_free(mdev, e->pages, is_net);
358 D_ASSERT(atomic_read(&e->pending_bios) == 0); 358 D_ASSERT(atomic_read(&e->pending_bios) == 0);
359 D_ASSERT(hlist_unhashed(&e->colision)); 359 D_ASSERT(hlist_unhashed(&e->collision));
360 mempool_free(e, drbd_ee_mempool); 360 mempool_free(e, drbd_ee_mempool);
361} 361}
362 362
@@ -787,7 +787,7 @@ static int drbd_connect(struct drbd_conf *mdev)
787 } 787 }
788 788
789 if (sock && msock) { 789 if (sock && msock) {
790 schedule_timeout_interruptible(HZ / 10); 790 schedule_timeout_interruptible(mdev->net_conf->ping_timeo*HZ/10);
791 ok = drbd_socket_okay(mdev, &sock); 791 ok = drbd_socket_okay(mdev, &sock);
792 ok = drbd_socket_okay(mdev, &msock) && ok; 792 ok = drbd_socket_okay(mdev, &msock) && ok;
793 if (ok) 793 if (ok)
@@ -899,11 +899,6 @@ retry:
899 899
900 drbd_thread_start(&mdev->asender); 900 drbd_thread_start(&mdev->asender);
901 901
902 if (mdev->agreed_pro_version < 95 && get_ldev(mdev)) {
903 drbd_setup_queue_param(mdev, DRBD_MAX_SIZE_H80_PACKET);
904 put_ldev(mdev);
905 }
906
907 if (drbd_send_protocol(mdev) == -1) 902 if (drbd_send_protocol(mdev) == -1)
908 return -1; 903 return -1;
909 drbd_send_sync_param(mdev, &mdev->sync_conf); 904 drbd_send_sync_param(mdev, &mdev->sync_conf);
@@ -1418,7 +1413,7 @@ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u
1418 sector_t sector = e->sector; 1413 sector_t sector = e->sector;
1419 int ok; 1414 int ok;
1420 1415
1421 D_ASSERT(hlist_unhashed(&e->colision)); 1416 D_ASSERT(hlist_unhashed(&e->collision));
1422 1417
1423 if (likely((e->flags & EE_WAS_ERROR) == 0)) { 1418 if (likely((e->flags & EE_WAS_ERROR) == 0)) {
1424 drbd_set_in_sync(mdev, sector, e->size); 1419 drbd_set_in_sync(mdev, sector, e->size);
@@ -1487,7 +1482,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
1487 return false; 1482 return false;
1488 } 1483 }
1489 1484
1490 /* hlist_del(&req->colision) is done in _req_may_be_done, to avoid 1485 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
1491 * special casing it there for the various failure cases. 1486 * special casing it there for the various failure cases.
1492 * still no race with drbd_fail_pending_reads */ 1487 * still no race with drbd_fail_pending_reads */
1493 ok = recv_dless_read(mdev, req, sector, data_size); 1488 ok = recv_dless_read(mdev, req, sector, data_size);
@@ -1558,11 +1553,11 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1558 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ 1553 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
1559 if (mdev->net_conf->two_primaries) { 1554 if (mdev->net_conf->two_primaries) {
1560 spin_lock_irq(&mdev->req_lock); 1555 spin_lock_irq(&mdev->req_lock);
1561 D_ASSERT(!hlist_unhashed(&e->colision)); 1556 D_ASSERT(!hlist_unhashed(&e->collision));
1562 hlist_del_init(&e->colision); 1557 hlist_del_init(&e->collision);
1563 spin_unlock_irq(&mdev->req_lock); 1558 spin_unlock_irq(&mdev->req_lock);
1564 } else { 1559 } else {
1565 D_ASSERT(hlist_unhashed(&e->colision)); 1560 D_ASSERT(hlist_unhashed(&e->collision));
1566 } 1561 }
1567 1562
1568 drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); 1563 drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
@@ -1579,8 +1574,8 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u
1579 ok = drbd_send_ack(mdev, P_DISCARD_ACK, e); 1574 ok = drbd_send_ack(mdev, P_DISCARD_ACK, e);
1580 1575
1581 spin_lock_irq(&mdev->req_lock); 1576 spin_lock_irq(&mdev->req_lock);
1582 D_ASSERT(!hlist_unhashed(&e->colision)); 1577 D_ASSERT(!hlist_unhashed(&e->collision));
1583 hlist_del_init(&e->colision); 1578 hlist_del_init(&e->collision);
1584 spin_unlock_irq(&mdev->req_lock); 1579 spin_unlock_irq(&mdev->req_lock);
1585 1580
1586 dec_unacked(mdev); 1581 dec_unacked(mdev);
@@ -1755,7 +1750,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
1755 1750
1756 spin_lock_irq(&mdev->req_lock); 1751 spin_lock_irq(&mdev->req_lock);
1757 1752
1758 hlist_add_head(&e->colision, ee_hash_slot(mdev, sector)); 1753 hlist_add_head(&e->collision, ee_hash_slot(mdev, sector));
1759 1754
1760#define OVERLAPS overlaps(i->sector, i->size, sector, size) 1755#define OVERLAPS overlaps(i->sector, i->size, sector, size)
1761 slot = tl_hash_slot(mdev, sector); 1756 slot = tl_hash_slot(mdev, sector);
@@ -1765,7 +1760,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
1765 int have_conflict = 0; 1760 int have_conflict = 0;
1766 prepare_to_wait(&mdev->misc_wait, &wait, 1761 prepare_to_wait(&mdev->misc_wait, &wait,
1767 TASK_INTERRUPTIBLE); 1762 TASK_INTERRUPTIBLE);
1768 hlist_for_each_entry(i, n, slot, colision) { 1763 hlist_for_each_entry(i, n, slot, collision) {
1769 if (OVERLAPS) { 1764 if (OVERLAPS) {
1770 /* only ALERT on first iteration, 1765 /* only ALERT on first iteration,
1771 * we may be woken up early... */ 1766 * we may be woken up early... */
@@ -1804,7 +1799,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
1804 } 1799 }
1805 1800
1806 if (signal_pending(current)) { 1801 if (signal_pending(current)) {
1807 hlist_del_init(&e->colision); 1802 hlist_del_init(&e->collision);
1808 1803
1809 spin_unlock_irq(&mdev->req_lock); 1804 spin_unlock_irq(&mdev->req_lock);
1810 1805
@@ -1862,7 +1857,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
1862 dev_err(DEV, "submit failed, triggering re-connect\n"); 1857 dev_err(DEV, "submit failed, triggering re-connect\n");
1863 spin_lock_irq(&mdev->req_lock); 1858 spin_lock_irq(&mdev->req_lock);
1864 list_del(&e->w.list); 1859 list_del(&e->w.list);
1865 hlist_del_init(&e->colision); 1860 hlist_del_init(&e->collision);
1866 spin_unlock_irq(&mdev->req_lock); 1861 spin_unlock_irq(&mdev->req_lock);
1867 if (e->flags & EE_CALL_AL_COMPLETE_IO) 1862 if (e->flags & EE_CALL_AL_COMPLETE_IO)
1868 drbd_al_complete_io(mdev, e->sector); 1863 drbd_al_complete_io(mdev, e->sector);
@@ -2916,12 +2911,6 @@ disconnect:
2916 return false; 2911 return false;
2917} 2912}
2918 2913
2919static void drbd_setup_order_type(struct drbd_conf *mdev, int peer)
2920{
2921 /* sorry, we currently have no working implementation
2922 * of distributed TCQ */
2923}
2924
2925/* warn if the arguments differ by more than 12.5% */ 2914/* warn if the arguments differ by more than 12.5% */
2926static void warn_if_differ_considerably(struct drbd_conf *mdev, 2915static void warn_if_differ_considerably(struct drbd_conf *mdev,
2927 const char *s, sector_t a, sector_t b) 2916 const char *s, sector_t a, sector_t b)
@@ -2939,7 +2928,6 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
2939{ 2928{
2940 struct p_sizes *p = &mdev->data.rbuf.sizes; 2929 struct p_sizes *p = &mdev->data.rbuf.sizes;
2941 enum determine_dev_size dd = unchanged; 2930 enum determine_dev_size dd = unchanged;
2942 unsigned int max_bio_size;
2943 sector_t p_size, p_usize, my_usize; 2931 sector_t p_size, p_usize, my_usize;
2944 int ldsc = 0; /* local disk size changed */ 2932 int ldsc = 0; /* local disk size changed */
2945 enum dds_flags ddsf; 2933 enum dds_flags ddsf;
@@ -2994,7 +2982,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
2994 2982
2995 ddsf = be16_to_cpu(p->dds_flags); 2983 ddsf = be16_to_cpu(p->dds_flags);
2996 if (get_ldev(mdev)) { 2984 if (get_ldev(mdev)) {
2997 dd = drbd_determin_dev_size(mdev, ddsf); 2985 dd = drbd_determine_dev_size(mdev, ddsf);
2998 put_ldev(mdev); 2986 put_ldev(mdev);
2999 if (dd == dev_size_error) 2987 if (dd == dev_size_error)
3000 return false; 2988 return false;
@@ -3004,23 +2992,15 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3004 drbd_set_my_capacity(mdev, p_size); 2992 drbd_set_my_capacity(mdev, p_size);
3005 } 2993 }
3006 2994
2995 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
2996 drbd_reconsider_max_bio_size(mdev);
2997
3007 if (get_ldev(mdev)) { 2998 if (get_ldev(mdev)) {
3008 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) { 2999 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3009 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev); 3000 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3010 ldsc = 1; 3001 ldsc = 1;
3011 } 3002 }
3012 3003
3013 if (mdev->agreed_pro_version < 94)
3014 max_bio_size = be32_to_cpu(p->max_bio_size);
3015 else if (mdev->agreed_pro_version == 94)
3016 max_bio_size = DRBD_MAX_SIZE_H80_PACKET;
3017 else /* drbd 8.3.8 onwards */
3018 max_bio_size = DRBD_MAX_BIO_SIZE;
3019
3020 if (max_bio_size != queue_max_hw_sectors(mdev->rq_queue) << 9)
3021 drbd_setup_queue_param(mdev, max_bio_size);
3022
3023 drbd_setup_order_type(mdev, be16_to_cpu(p->queue_order_type));
3024 put_ldev(mdev); 3004 put_ldev(mdev);
3025 } 3005 }
3026 3006
@@ -4275,7 +4255,7 @@ static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev,
4275 struct hlist_node *n; 4255 struct hlist_node *n;
4276 struct drbd_request *req; 4256 struct drbd_request *req;
4277 4257
4278 hlist_for_each_entry(req, n, slot, colision) { 4258 hlist_for_each_entry(req, n, slot, collision) {
4279 if ((unsigned long)req == (unsigned long)id) { 4259 if ((unsigned long)req == (unsigned long)id) {
4280 if (req->sector != sector) { 4260 if (req->sector != sector) {
4281 dev_err(DEV, "_ack_id_to_req: found req %p but it has " 4261 dev_err(DEV, "_ack_id_to_req: found req %p but it has "
@@ -4554,6 +4534,7 @@ int drbd_asender(struct drbd_thread *thi)
4554 int received = 0; 4534 int received = 0;
4555 int expect = sizeof(struct p_header80); 4535 int expect = sizeof(struct p_header80);
4556 int empty; 4536 int empty;
4537 int ping_timeout_active = 0;
4557 4538
4558 sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); 4539 sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev));
4559 4540
@@ -4566,6 +4547,7 @@ int drbd_asender(struct drbd_thread *thi)
4566 ERR_IF(!drbd_send_ping(mdev)) goto reconnect; 4547 ERR_IF(!drbd_send_ping(mdev)) goto reconnect;
4567 mdev->meta.socket->sk->sk_rcvtimeo = 4548 mdev->meta.socket->sk->sk_rcvtimeo =
4568 mdev->net_conf->ping_timeo*HZ/10; 4549 mdev->net_conf->ping_timeo*HZ/10;
4550 ping_timeout_active = 1;
4569 } 4551 }
4570 4552
4571 /* conditionally cork; 4553 /* conditionally cork;
@@ -4620,8 +4602,7 @@ int drbd_asender(struct drbd_thread *thi)
4620 dev_err(DEV, "meta connection shut down by peer.\n"); 4602 dev_err(DEV, "meta connection shut down by peer.\n");
4621 goto reconnect; 4603 goto reconnect;
4622 } else if (rv == -EAGAIN) { 4604 } else if (rv == -EAGAIN) {
4623 if (mdev->meta.socket->sk->sk_rcvtimeo == 4605 if (ping_timeout_active) {
4624 mdev->net_conf->ping_timeo*HZ/10) {
4625 dev_err(DEV, "PingAck did not arrive in time.\n"); 4606 dev_err(DEV, "PingAck did not arrive in time.\n");
4626 goto reconnect; 4607 goto reconnect;
4627 } 4608 }
@@ -4660,6 +4641,11 @@ int drbd_asender(struct drbd_thread *thi)
4660 if (!cmd->process(mdev, h)) 4641 if (!cmd->process(mdev, h))
4661 goto reconnect; 4642 goto reconnect;
4662 4643
4644 /* the idle_timeout (ping-int)
4645 * has been restored in got_PingAck() */
4646 if (cmd == get_asender_cmd(P_PING_ACK))
4647 ping_timeout_active = 0;
4648
4663 buf = h; 4649 buf = h;
4664 received = 0; 4650 received = 0;
4665 expect = sizeof(struct p_header80); 4651 expect = sizeof(struct p_header80);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 5c0c8be1bb0a..3424d675b769 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -163,7 +163,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev,
163 * they must have been failed on the spot */ 163 * they must have been failed on the spot */
164#define OVERLAPS overlaps(sector, size, i->sector, i->size) 164#define OVERLAPS overlaps(sector, size, i->sector, i->size)
165 slot = tl_hash_slot(mdev, sector); 165 slot = tl_hash_slot(mdev, sector);
166 hlist_for_each_entry(i, n, slot, colision) { 166 hlist_for_each_entry(i, n, slot, collision) {
167 if (OVERLAPS) { 167 if (OVERLAPS) {
168 dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; " 168 dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; "
169 "other: %p %llus +%u\n", 169 "other: %p %llus +%u\n",
@@ -187,7 +187,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev,
187#undef OVERLAPS 187#undef OVERLAPS
188#define OVERLAPS overlaps(sector, size, e->sector, e->size) 188#define OVERLAPS overlaps(sector, size, e->sector, e->size)
189 slot = ee_hash_slot(mdev, req->sector); 189 slot = ee_hash_slot(mdev, req->sector);
190 hlist_for_each_entry(e, n, slot, colision) { 190 hlist_for_each_entry(e, n, slot, collision) {
191 if (OVERLAPS) { 191 if (OVERLAPS) {
192 wake_up(&mdev->misc_wait); 192 wake_up(&mdev->misc_wait);
193 break; 193 break;
@@ -260,8 +260,8 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
260 260
261 /* remove the request from the conflict detection 261 /* remove the request from the conflict detection
262 * respective block_id verification hash */ 262 * respective block_id verification hash */
263 if (!hlist_unhashed(&req->colision)) 263 if (!hlist_unhashed(&req->collision))
264 hlist_del(&req->colision); 264 hlist_del(&req->collision);
265 else 265 else
266 D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0); 266 D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0);
267 267
@@ -329,7 +329,7 @@ static int _req_conflicts(struct drbd_request *req)
329 struct hlist_node *n; 329 struct hlist_node *n;
330 struct hlist_head *slot; 330 struct hlist_head *slot;
331 331
332 D_ASSERT(hlist_unhashed(&req->colision)); 332 D_ASSERT(hlist_unhashed(&req->collision));
333 333
334 if (!get_net_conf(mdev)) 334 if (!get_net_conf(mdev))
335 return 0; 335 return 0;
@@ -341,7 +341,7 @@ static int _req_conflicts(struct drbd_request *req)
341 341
342#define OVERLAPS overlaps(i->sector, i->size, sector, size) 342#define OVERLAPS overlaps(i->sector, i->size, sector, size)
343 slot = tl_hash_slot(mdev, sector); 343 slot = tl_hash_slot(mdev, sector);
344 hlist_for_each_entry(i, n, slot, colision) { 344 hlist_for_each_entry(i, n, slot, collision) {
345 if (OVERLAPS) { 345 if (OVERLAPS) {
346 dev_alert(DEV, "%s[%u] Concurrent local write detected! " 346 dev_alert(DEV, "%s[%u] Concurrent local write detected! "
347 "[DISCARD L] new: %llus +%u; " 347 "[DISCARD L] new: %llus +%u; "
@@ -359,7 +359,7 @@ static int _req_conflicts(struct drbd_request *req)
359#undef OVERLAPS 359#undef OVERLAPS
360#define OVERLAPS overlaps(e->sector, e->size, sector, size) 360#define OVERLAPS overlaps(e->sector, e->size, sector, size)
361 slot = ee_hash_slot(mdev, sector); 361 slot = ee_hash_slot(mdev, sector);
362 hlist_for_each_entry(e, n, slot, colision) { 362 hlist_for_each_entry(e, n, slot, collision) {
363 if (OVERLAPS) { 363 if (OVERLAPS) {
364 dev_alert(DEV, "%s[%u] Concurrent remote write detected!" 364 dev_alert(DEV, "%s[%u] Concurrent remote write detected!"
365 " [DISCARD L] new: %llus +%u; " 365 " [DISCARD L] new: %llus +%u; "
@@ -491,7 +491,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
491 491
492 /* so we can verify the handle in the answer packet 492 /* so we can verify the handle in the answer packet
493 * corresponding hlist_del is in _req_may_be_done() */ 493 * corresponding hlist_del is in _req_may_be_done() */
494 hlist_add_head(&req->colision, ar_hash_slot(mdev, req->sector)); 494 hlist_add_head(&req->collision, ar_hash_slot(mdev, req->sector));
495 495
496 set_bit(UNPLUG_REMOTE, &mdev->flags); 496 set_bit(UNPLUG_REMOTE, &mdev->flags);
497 497
@@ -507,7 +507,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
507 /* assert something? */ 507 /* assert something? */
508 /* from drbd_make_request_common only */ 508 /* from drbd_make_request_common only */
509 509
510 hlist_add_head(&req->colision, tl_hash_slot(mdev, req->sector)); 510 hlist_add_head(&req->collision, tl_hash_slot(mdev, req->sector));
511 /* corresponding hlist_del is in _req_may_be_done() */ 511 /* corresponding hlist_del is in _req_may_be_done() */
512 512
513 /* NOTE 513 /* NOTE
@@ -1033,7 +1033,7 @@ fail_conflicting:
1033 err = 0; 1033 err = 0;
1034 1034
1035fail_free_complete: 1035fail_free_complete:
1036 if (rw == WRITE && local) 1036 if (req->rq_state & RQ_IN_ACT_LOG)
1037 drbd_al_complete_io(mdev, sector); 1037 drbd_al_complete_io(mdev, sector);
1038fail_and_free_req: 1038fail_and_free_req:
1039 if (local) { 1039 if (local) {
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 32e2c3e6a813..68a234a5fdc5 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -256,7 +256,7 @@ static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev,
256 struct hlist_node *n; 256 struct hlist_node *n;
257 struct drbd_request *req; 257 struct drbd_request *req;
258 258
259 hlist_for_each_entry(req, n, slot, colision) { 259 hlist_for_each_entry(req, n, slot, collision) {
260 if ((unsigned long)req == (unsigned long)id) { 260 if ((unsigned long)req == (unsigned long)id) {
261 D_ASSERT(req->sector == sector); 261 D_ASSERT(req->sector == sector);
262 return req; 262 return req;
@@ -291,7 +291,7 @@ static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
291 req->epoch = 0; 291 req->epoch = 0;
292 req->sector = bio_src->bi_sector; 292 req->sector = bio_src->bi_sector;
293 req->size = bio_src->bi_size; 293 req->size = bio_src->bi_size;
294 INIT_HLIST_NODE(&req->colision); 294 INIT_HLIST_NODE(&req->collision);
295 INIT_LIST_HEAD(&req->tl_requests); 295 INIT_LIST_HEAD(&req->tl_requests);
296 INIT_LIST_HEAD(&req->w.list); 296 INIT_LIST_HEAD(&req->w.list);
297 } 297 }
@@ -323,6 +323,7 @@ extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
323extern void complete_master_bio(struct drbd_conf *mdev, 323extern void complete_master_bio(struct drbd_conf *mdev,
324 struct bio_and_error *m); 324 struct bio_and_error *m);
325extern void request_timer_fn(unsigned long data); 325extern void request_timer_fn(unsigned long data);
326extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what);
326 327
327/* use this if you don't want to deal with calling complete_master_bio() 328/* use this if you don't want to deal with calling complete_master_bio()
328 * outside the spinlock, e.g. when walking some list on cleanup. */ 329 * outside the spinlock, e.g. when walking some list on cleanup. */
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index f7e6c92f8d03..4d76b06b6b20 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -126,7 +126,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo
126 list_del(&e->w.list); /* has been on active_ee or sync_ee */ 126 list_del(&e->w.list); /* has been on active_ee or sync_ee */
127 list_add_tail(&e->w.list, &mdev->done_ee); 127 list_add_tail(&e->w.list, &mdev->done_ee);
128 128
129 /* No hlist_del_init(&e->colision) here, we did not send the Ack yet, 129 /* No hlist_del_init(&e->collision) here, we did not send the Ack yet,
130 * neither did we wake possibly waiting conflicting requests. 130 * neither did we wake possibly waiting conflicting requests.
131 * done from "drbd_process_done_ee" within the appropriate w.cb 131 * done from "drbd_process_done_ee" within the appropriate w.cb
132 * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */ 132 * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */
@@ -297,42 +297,48 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *
297 crypto_hash_final(&desc, digest); 297 crypto_hash_final(&desc, digest);
298} 298}
299 299
300static int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 300/* TODO merge common code with w_e_end_ov_req */
301int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
301{ 302{
302 struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); 303 struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
303 int digest_size; 304 int digest_size;
304 void *digest; 305 void *digest;
305 int ok; 306 int ok = 1;
306 307
307 D_ASSERT(e->block_id == DRBD_MAGIC + 0xbeef); 308 D_ASSERT(e->block_id == DRBD_MAGIC + 0xbeef);
308 309
309 if (unlikely(cancel)) { 310 if (unlikely(cancel))
310 drbd_free_ee(mdev, e); 311 goto out;
311 return 1;
312 }
313 312
314 if (likely((e->flags & EE_WAS_ERROR) == 0)) { 313 if (likely((e->flags & EE_WAS_ERROR) != 0))
315 digest_size = crypto_hash_digestsize(mdev->csums_tfm); 314 goto out;
316 digest = kmalloc(digest_size, GFP_NOIO);
317 if (digest) {
318 drbd_csum_ee(mdev, mdev->csums_tfm, e, digest);
319 315
320 inc_rs_pending(mdev); 316 digest_size = crypto_hash_digestsize(mdev->csums_tfm);
321 ok = drbd_send_drequest_csum(mdev, 317 digest = kmalloc(digest_size, GFP_NOIO);
322 e->sector, 318 if (digest) {
323 e->size, 319 sector_t sector = e->sector;
324 digest, 320 unsigned int size = e->size;
325 digest_size, 321 drbd_csum_ee(mdev, mdev->csums_tfm, e, digest);
326 P_CSUM_RS_REQUEST); 322 /* Free e and pages before send.
327 kfree(digest); 323 * In case we block on congestion, we could otherwise run into
328 } else { 324 * some distributed deadlock, if the other side blocks on
329 dev_err(DEV, "kmalloc() of digest failed.\n"); 325 * congestion as well, because our receiver blocks in
330 ok = 0; 326 * drbd_pp_alloc due to pp_in_use > max_buffers. */
331 } 327 drbd_free_ee(mdev, e);
332 } else 328 e = NULL;
333 ok = 1; 329 inc_rs_pending(mdev);
330 ok = drbd_send_drequest_csum(mdev, sector, size,
331 digest, digest_size,
332 P_CSUM_RS_REQUEST);
333 kfree(digest);
334 } else {
335 dev_err(DEV, "kmalloc() of digest failed.\n");
336 ok = 0;
337 }
334 338
335 drbd_free_ee(mdev, e); 339out:
340 if (e)
341 drbd_free_ee(mdev, e);
336 342
337 if (unlikely(!ok)) 343 if (unlikely(!ok))
338 dev_err(DEV, "drbd_send_drequest(..., csum) failed\n"); 344 dev_err(DEV, "drbd_send_drequest(..., csum) failed\n");
@@ -834,7 +840,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
834 const int ratio = 840 const int ratio =
835 (t == 0) ? 0 : 841 (t == 0) ? 0 :
836 (t < 100000) ? ((s*100)/t) : (s/(t/100)); 842 (t < 100000) ? ((s*100)/t) : (s/(t/100));
837 dev_info(DEV, "%u %% had equal check sums, eliminated: %luK; " 843 dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; "
838 "transferred %luK total %luK\n", 844 "transferred %luK total %luK\n",
839 ratio, 845 ratio,
840 Bit2KB(mdev->rs_same_csum), 846 Bit2KB(mdev->rs_same_csum),
@@ -1071,9 +1077,12 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1071 return ok; 1077 return ok;
1072} 1078}
1073 1079
1080/* TODO merge common code with w_e_send_csum */
1074int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 1081int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1075{ 1082{
1076 struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); 1083 struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
1084 sector_t sector = e->sector;
1085 unsigned int size = e->size;
1077 int digest_size; 1086 int digest_size;
1078 void *digest; 1087 void *digest;
1079 int ok = 1; 1088 int ok = 1;
@@ -1093,17 +1102,25 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1093 else 1102 else
1094 memset(digest, 0, digest_size); 1103 memset(digest, 0, digest_size);
1095 1104
1105 /* Free e and pages before send.
1106 * In case we block on congestion, we could otherwise run into
1107 * some distributed deadlock, if the other side blocks on
1108 * congestion as well, because our receiver blocks in
1109 * drbd_pp_alloc due to pp_in_use > max_buffers. */
1110 drbd_free_ee(mdev, e);
1111 e = NULL;
1096 inc_rs_pending(mdev); 1112 inc_rs_pending(mdev);
1097 ok = drbd_send_drequest_csum(mdev, e->sector, e->size, 1113 ok = drbd_send_drequest_csum(mdev, sector, size,
1098 digest, digest_size, P_OV_REPLY); 1114 digest, digest_size,
1115 P_OV_REPLY);
1099 if (!ok) 1116 if (!ok)
1100 dec_rs_pending(mdev); 1117 dec_rs_pending(mdev);
1101 kfree(digest); 1118 kfree(digest);
1102 1119
1103out: 1120out:
1104 drbd_free_ee(mdev, e); 1121 if (e)
1122 drbd_free_ee(mdev, e);
1105 dec_unacked(mdev); 1123 dec_unacked(mdev);
1106
1107 return ok; 1124 return ok;
1108} 1125}
1109 1126
@@ -1122,8 +1139,10 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1122{ 1139{
1123 struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); 1140 struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
1124 struct digest_info *di; 1141 struct digest_info *di;
1125 int digest_size;
1126 void *digest; 1142 void *digest;
1143 sector_t sector = e->sector;
1144 unsigned int size = e->size;
1145 int digest_size;
1127 int ok, eq = 0; 1146 int ok, eq = 0;
1128 1147
1129 if (unlikely(cancel)) { 1148 if (unlikely(cancel)) {
@@ -1153,16 +1172,21 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1153 } 1172 }
1154 } 1173 }
1155 1174
1156 dec_unacked(mdev); 1175 /* Free e and pages before send.
1176 * In case we block on congestion, we could otherwise run into
1177 * some distributed deadlock, if the other side blocks on
1178 * congestion as well, because our receiver blocks in
1179 * drbd_pp_alloc due to pp_in_use > max_buffers. */
1180 drbd_free_ee(mdev, e);
1157 if (!eq) 1181 if (!eq)
1158 drbd_ov_oos_found(mdev, e->sector, e->size); 1182 drbd_ov_oos_found(mdev, sector, size);
1159 else 1183 else
1160 ov_oos_print(mdev); 1184 ov_oos_print(mdev);
1161 1185
1162 ok = drbd_send_ack_ex(mdev, P_OV_RESULT, e->sector, e->size, 1186 ok = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size,
1163 eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); 1187 eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
1164 1188
1165 drbd_free_ee(mdev, e); 1189 dec_unacked(mdev);
1166 1190
1167 --mdev->ov_left; 1191 --mdev->ov_left;
1168 1192